def test_parse_with_qualifiers(self): fd = parse('1985?') assert fd.year == u'1985', fd assert fd.qualifier == u'Uncertainty : 1985?', fd.qualifier fd = parse('c.1780') assert fd.year == u'1780', fd assert fd.qualifier == u"Note 'circa' : c.1780", fd fd = parse('c. 1780') assert fd.year == u'1780', fd assert fd.qualifier.startswith(u"Note 'circa'"), fd
def test_parse(self): d1 = datetime.datetime(2000, 1, 23) fd = parse(d1) assert fd.year == '2000' fd = parse('March 1762') assert str(fd) == '1762-03' fd = parse(1966) assert str(fd) == '1966' fd = parse('22/07/2010') assert fd.month == '07', fd.month
def test_ambiguous(self): # TODO: have to be careful here ... fd = parse('1068/1069')
def test_parse_wildcards(self): fd = parse('198?') assert fd.year == '', fd.year # expect this to not parse
def test_parse_with_none(self): d1 = parse(None) assert d1 is None
def test_parse_ambiguous_day_month(self): fd = parse('05/07/2010') assert fd.month == '07', fd.month assert fd.day == '05', fd.month
def get_combined_index_data(self): combined_index_data = {} title = self._get_text_or_none('.//oai:title') if title: combined_index_data['title'] = title description = self._get_text_or_none('.//oai:description') if description: combined_index_data['description'] = description date = None date_granularity = 0 # Get date start date_start_raw = self._get_text_or_none('.//oai:production.date.start') # Skip date if 5 or more digits are concatenated, because we # don't know how to parse those numbers if date_start_raw: if not re.search(r'\d{5}', date_start_raw): if re.search(r'\d', date_start_raw): date_start = parse(date_start_raw.strip()) # Get date end date_end_raw = self._get_text_or_none('.//oai:production.date.end') if re.search(r'\d', date_end_raw): date_end = parse(date_end_raw.strip()) # Only save date if something got parsed if date_start.isoformat(): # Determine granularity date_granularity = len(re.findall(r'\d', date_start.isoformat())) # Take average of start and end date if end date does not # exist and they are not the same if not date_end.isoformat(): date = date_start.as_datetime() elif date_start.isoformat() == date_end.isoformat(): date = date_start.as_datetime() else: # Take the average of years using integers to also # support before christ years (which datetime doesn't # support) if len(date_start.isoformat()) == 4: average_year = int(date_start.year) + (int(date_end.year) - int(date_start.year)) / 2 date = parse(average_year).as_datetime() # Take averages for dates with months and days using # datetime else: date = date_start.as_datetime() + (date_end.as_datetime() - date_start.as_datetime()) / 2 combined_index_data['date'] = date combined_index_data['date_granularity'] = date_granularity authors = self._get_text_or_none('.//oai:creator') if authors: combined_index_data['authors'] = [authors] mediums = self.original_item.findall('.//oai:imageUrl', namespaces=self.namespaces) if mediums: combined_index_data['media_urls'] = [] for medium in mediums: #if medium.text.strip().split('.')[-1].lower() in self.media_mime_types: combined_index_data['media_urls'].append( { 'original_url': unicode(medium.text.strip()), 'content_type': self.media_mime_types[ unicode(medium.text).strip().split('.')[-1].lower() ] } ) return combined_index_data
def test_small_years_with_zeros(self): in1 = '0023' fd = parse(in1) assert str(fd) == '0023', fd assert fd.as_float() == 23, fd.as_float()
def test_years_with_alpha_prefix(self): in1 = "p1980" fd = parse(in1) assert str(fd) == "1980", fd
def get_combined_index_data(self): combined_index_data = {} title = self._get_text_or_none('.//oai:title') if title: combined_index_data['title'] = title description = self._get_text_or_none('.//oai:description') if description: combined_index_data['description'] = description date = None date_granularity = 0 # Get date start date_start_raw = self._get_text_or_none('.//oai:production.date.start') # Skip date if 5 or more digits are concatenated, because we # don't know how to parse those numbers if date_start_raw: if not re.search(r'\d{5}', date_start_raw): if re.search(r'\d', date_start_raw): date_start = parse(date_start_raw.strip()) # Get date end date_end_raw = self._get_text_or_none( './/oai:production.date.end') if re.search(r'\d', date_end_raw): date_end = parse(date_end_raw.strip()) # Only save date if something got parsed if date_start.isoformat(): # Determine granularity date_granularity = len( re.findall(r'\d', date_start.isoformat())) # Take average of start and end date if end date does not # exist and they are not the same if not date_end.isoformat(): date = date_start.as_datetime() elif date_start.isoformat() == date_end.isoformat(): date = date_start.as_datetime() else: # Take the average of years using integers to also # support before christ years (which datetime doesn't # support) if len(date_start.isoformat()) == 4: average_year = int(date_start.year) + ( int(date_end.year) - int(date_start.year)) / 2 date = parse(average_year).as_datetime() # Take averages for dates with months and days using # datetime else: date = date_start.as_datetime() + ( date_end.as_datetime() - date_start.as_datetime()) / 2 combined_index_data['date'] = date combined_index_data['date_granularity'] = date_granularity authors = self._get_text_or_none('.//oai:creator') if authors: combined_index_data['authors'] = [authors] mediums = self.original_item.findall('.//oai:imageUrl', namespaces=self.namespaces) if mediums: combined_index_data['media_urls'] = [] for medium in mediums: #if medium.text.strip().split('.')[-1].lower() in self.media_mime_types: combined_index_data['media_urls'].append({ 'original_url': unicode(medium.text.strip()), 'content_type': self.media_mime_types[unicode( medium.text).strip().split('.')[-1].lower()] }) return combined_index_data