예제 #1
0
    def test_parse_with_qualifiers(self):
        fd = parse('1985?')
        assert fd.year == u'1985', fd
        assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier

        fd = parse('c.1780')
        assert fd.year == u'1780', fd
        assert fd.qualifier == u"Note 'circa' : c.1780", fd

        fd = parse('c. 1780')
        assert fd.year == u'1780', fd
        assert fd.qualifier.startswith(u"Note 'circa'"), fd
예제 #2
0
    def test_parse_with_qualifiers(self):
        fd = parse('1985?')
        assert fd.year == u'1985', fd
        assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier

        fd = parse('c.1780')
        assert fd.year == u'1780', fd
        assert fd.qualifier == u"Note 'circa' : c.1780", fd

        fd = parse('c. 1780')
        assert fd.year == u'1780', fd
        assert fd.qualifier.startswith(u"Note 'circa'"), fd
예제 #3
0
    def test_parse(self):
        d1 = datetime.datetime(2000, 1, 23)
        fd = parse(d1)
        assert fd.year == '2000'

        fd = parse('March 1762')
        assert str(fd) == '1762-03'

        fd = parse(1966)
        assert str(fd) == '1966'

        fd = parse('22/07/2010')
        assert fd.month == '07', fd.month
예제 #4
0
    def test_parse(self):
        d1 = datetime.datetime(2000, 1, 23)
        fd = parse(d1)
        assert fd.year == '2000'

        fd = parse('March 1762')
        assert str(fd) == '1762-03'

        fd = parse(1966)
        assert str(fd) == '1966'

        fd = parse('22/07/2010')
        assert fd.month == '07', fd.month
예제 #5
0
 def test_ambiguous(self):
     # TODO: have to be careful here ...
     fd = parse('1068/1069')
예제 #6
0
 def test_parse_wildcards(self):
     fd = parse('198?')
     assert fd.year == '', fd.year  # expect this to not parse
예제 #7
0
 def test_parse_with_none(self):
     d1 = parse(None)
     assert d1 is None
예제 #8
0
 def test_parse_ambiguous_day_month(self):
     fd = parse('05/07/2010')
     assert fd.month == '07', fd.month
     assert fd.day == '05', fd.month
예제 #9
0
    def get_combined_index_data(self):
        combined_index_data = {}

        title = self._get_text_or_none('.//oai:title')
        if title:
            combined_index_data['title'] = title

        description = self._get_text_or_none('.//oai:description')
        if description:
            combined_index_data['description'] = description

        date = None
        date_granularity = 0
        # Get date start
        date_start_raw = self._get_text_or_none('.//oai:production.date.start')
        # Skip date if 5 or more digits are concatenated, because we
        # don't know how to parse those numbers
        if date_start_raw:
            if not re.search(r'\d{5}', date_start_raw):
                if re.search(r'\d', date_start_raw):
                    date_start = parse(date_start_raw.strip())

                # Get date end
                date_end_raw = self._get_text_or_none('.//oai:production.date.end')
                if re.search(r'\d', date_end_raw):
                    date_end = parse(date_end_raw.strip())

                # Only save date if something got parsed
                if date_start.isoformat():
                    # Determine granularity
                    date_granularity = len(re.findall(r'\d', date_start.isoformat()))

                    # Take average of start and end date if end date does not
                    # exist and they are not the same
                    if not date_end.isoformat():
                        date = date_start.as_datetime()
                    elif date_start.isoformat() == date_end.isoformat():
                        date = date_start.as_datetime()
                    else:
                        # Take the average of years using integers to also
                        # support before christ years (which datetime doesn't
                        # support)
                        if len(date_start.isoformat()) == 4:
                            average_year = int(date_start.year) + (int(date_end.year) - int(date_start.year)) / 2
                            date = parse(average_year).as_datetime()
                        # Take averages for dates with months and days using
                        # datetime
                        else:
                            date = date_start.as_datetime() + (date_end.as_datetime() - date_start.as_datetime()) / 2

        combined_index_data['date'] = date
        combined_index_data['date_granularity'] = date_granularity

        authors = self._get_text_or_none('.//oai:creator')
        if authors:
            combined_index_data['authors'] = [authors]

        mediums = self.original_item.findall('.//oai:imageUrl', namespaces=self.namespaces)
        if mediums:
            combined_index_data['media_urls'] = []
            for medium in mediums:
                #if medium.text.strip().split('.')[-1].lower() in self.media_mime_types:
                combined_index_data['media_urls'].append(
                    {
                        'original_url': unicode(medium.text.strip()),
                        'content_type': self.media_mime_types[
                            unicode(medium.text).strip().split('.')[-1].lower()
                        ]
                    }
                )

        return combined_index_data
예제 #10
0
 def test_small_years_with_zeros(self):
     in1 = '0023'
     fd = parse(in1)
     assert str(fd) == '0023', fd
     assert fd.as_float() == 23, fd.as_float()
예제 #11
0
 def test_ambiguous(self):
     # TODO: have to be careful here ...
     fd = parse('1068/1069')
예제 #12
0
 def test_parse_wildcards(self):
     fd = parse('198?')
     assert fd.year == '', fd.year # expect this to not parse
예제 #13
0
 def test_parse_with_none(self):
     d1 = parse(None)
     assert d1 is None
예제 #14
0
 def test_parse_ambiguous_day_month(self):
     fd = parse('05/07/2010')
     assert fd.month == '07', fd.month
     assert fd.day == '05', fd.month
예제 #15
0
 def test_small_years_with_zeros(self):
     in1 = '0023'
     fd = parse(in1)
     assert str(fd) == '0023', fd
     assert fd.as_float() == 23, fd.as_float()
예제 #16
0
 def test_years_with_alpha_prefix(self):
     in1 = "p1980"
     fd = parse(in1)
     assert str(fd) == "1980", fd
예제 #17
0
 def test_years_with_alpha_prefix(self):
     in1 = "p1980"
     fd = parse(in1)
     assert str(fd) == "1980", fd
예제 #18
0
    def get_combined_index_data(self):
        combined_index_data = {}

        title = self._get_text_or_none('.//oai:title')
        if title:
            combined_index_data['title'] = title

        description = self._get_text_or_none('.//oai:description')
        if description:
            combined_index_data['description'] = description

        date = None
        date_granularity = 0
        # Get date start
        date_start_raw = self._get_text_or_none('.//oai:production.date.start')
        # Skip date if 5 or more digits are concatenated, because we
        # don't know how to parse those numbers
        if date_start_raw:
            if not re.search(r'\d{5}', date_start_raw):
                if re.search(r'\d', date_start_raw):
                    date_start = parse(date_start_raw.strip())

                # Get date end
                date_end_raw = self._get_text_or_none(
                    './/oai:production.date.end')
                if re.search(r'\d', date_end_raw):
                    date_end = parse(date_end_raw.strip())

                # Only save date if something got parsed
                if date_start.isoformat():
                    # Determine granularity
                    date_granularity = len(
                        re.findall(r'\d', date_start.isoformat()))

                    # Take average of start and end date if end date does not
                    # exist and they are not the same
                    if not date_end.isoformat():
                        date = date_start.as_datetime()
                    elif date_start.isoformat() == date_end.isoformat():
                        date = date_start.as_datetime()
                    else:
                        # Take the average of years using integers to also
                        # support before christ years (which datetime doesn't
                        # support)
                        if len(date_start.isoformat()) == 4:
                            average_year = int(date_start.year) + (
                                int(date_end.year) - int(date_start.year)) / 2
                            date = parse(average_year).as_datetime()
                        # Take averages for dates with months and days using
                        # datetime
                        else:
                            date = date_start.as_datetime() + (
                                date_end.as_datetime() -
                                date_start.as_datetime()) / 2

        combined_index_data['date'] = date
        combined_index_data['date_granularity'] = date_granularity

        authors = self._get_text_or_none('.//oai:creator')
        if authors:
            combined_index_data['authors'] = [authors]

        mediums = self.original_item.findall('.//oai:imageUrl',
                                             namespaces=self.namespaces)
        if mediums:
            combined_index_data['media_urls'] = []
            for medium in mediums:
                #if medium.text.strip().split('.')[-1].lower() in self.media_mime_types:
                combined_index_data['media_urls'].append({
                    'original_url':
                    unicode(medium.text.strip()),
                    'content_type':
                    self.media_mime_types[unicode(
                        medium.text).strip().split('.')[-1].lower()]
                })

        return combined_index_data