def __call__(self,values): ddp = DateDataParser() values = ''.join(values) values = values.replace(u'\u00ab',"") values = values.replace(u'\u00bb',"") dateobj = ddp.get_date_data(values)['date_obj'] return dateobj.strftime("%B %d, %Y, %H:%M:%S")
def __init__(self, config=None): self.active_reminder = {} self.regex = r'\[(.*)\]' self.settings = {'PREFER_DATES_FROM': 'future', 'DATE_ORDER': 'DMY'} self.parser = DateDataParser(languages=['en'], allow_redetect_language=False, settings=self.settings)
def given_parser(self, settings=None): def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(*args, **kwargs): self.freshness_result = get_date_data(*args, **kwargs) return self.freshness_result return wrapped self.add_patch( patch.object( freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) self.add_patch(patch.object(self.freshness_parser, 'now', self.now)) dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime) dt_mock.utcnow = Mock(return_value=self.now) self.add_patch( patch('dateparser.freshness_date_parser.datetime', new=dt_mock)) self.add_patch( patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser(settings=settings)
def __call__(self, values): values = super(Date, self).__call__(values) dates = [] for text in values: if isinstance(text, (dict, list)): dates.append(text) try: date = DateDataParser().get_date_data(text)['date_obj'] dates.append(date.strftime(self.format)) except ValueError: pass return dates
def search_parse(self, shortname, text, settings): translated, original = self.search(shortname, text, settings) bad_translate_with_search = ['vi', 'hu'] # splitting done by spaces and some dictionary items contain spaces if shortname not in bad_translate_with_search: parser = DateDataParser(languages=['en'], settings=settings) parsed, substrings = self.parse_found_objects(parser=parser, to_parse=translated, original=original, translated=translated, settings=settings) else: parser = DateDataParser(languages=[shortname], settings=settings) parsed, substrings = self.parse_found_objects(parser=parser, to_parse=original, original=original, translated=translated, settings=settings) parser._settings = Settings() return list(zip(substrings, [i['date_obj'] for i in parsed]))
def given_parser(self): def collecting_get_date_data(parse): @wraps(parse) def wrapped(date_string): self.date_result = parse(date_string) return self.date_result return wrapped self.add_patch(patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser()
def __call__(self, values): values = super(Date, self).__call__(values) dates = [] for text in values: if isinstance(text, (dict, list)): dates.append(text) try: date = DateDataParser(settings={ 'PREFER_DAY_OF_MONTH': 'first' }).get_date_data(text)['date_obj'] dates.append(date.strftime(self.format)) except ValueError: pass except AttributeError: pass return dates
def given_parser(self): self.add_patch(patch.object(freshness_date_parser, 'now', self.now)) def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(date_string): self.freshness_result = get_date_data(date_string) return self.freshness_result return wrapped self.add_patch(patch.object(freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser()
def parse_url(self, response): date = response.xpath("//span[@class='highwire-cite-metadata-date highwire-cite-metadata']//text()").extract_first() release_date = DateDataParser().get_date_data(date)['date_obj'].strftime("%Y-%m-%d") for article in response.xpath("//a[@class='highwire-cite-linked-title']"): url = urlparse.urljoin(response.url, article.xpath("./@href").extract_first()) yield { "url" : url, "release_date" : release_date }
def __init__(self, config): super(RelevancePeriodExtractor, self).__init__(config) timeliness_params = self.config['timeliness'] self.extract_period = timeliness_params.get('extract_period', False) self.timeliness_strategy = timeliness_params.get('timeliness_strategy', []) self.date_order = timeliness_params.get('date_order', 'DMY') self.max_empty_relevance_period = timeliness_params.get('max_empty_relevance_period', 10) if not self.timeliness_strategy: raise ValueError('You need to provide values for "timeliness_strategy."') datapackage_check = DataPackageChecker(self.config) datapackage_check.check_database_completeness([self.source_file]) settings = {'RETURN_AS_TIMEZONE_AWARE': False, 'PREFER_DAY_OF_MONTH': 'last', 'PREFER_DATES_FROM': 'past', 'SKIP_TOKENS': ['to'], 'DATE_ORDER': self.date_order} self.date_parser = DateDataParser(allow_redetect_language=True, settings=settings)
def parse_html(html): """Parse data from string containing HTML. Returns a DataFrame. """ soup = BeautifulSoup(html, 'html.parser') df = find_data(soup) # append original date column original_week_period = find_week_date(soup) df['original_week_period'] = original_week_period # extract date value from text date_search = re.search('(\d+\s+\w+\s+\d{2,4})$', original_week_period, re.IGNORECASE) if date_search: original_date_text = date_search.group(1) df['original_date_text'] = original_date_text else: raise Exception( f"Couldn't extract date from date text {original_week_period}.") # parse date ddp = DateDataParser(languages=['tr'], settings={'DATE_ORDER': 'DMY'}) df['date'] = ddp.get_date_data(original_date_text).date_obj # parse numeric 'vaka sayısı' figure using TR locale locale.setlocale(locale.LC_NUMERIC, 'tr_TR') df['data-detay'] = df['data-detay'].apply(locale.atof) # remove dash from column names df = df.rename(columns={ 'data-adi': 'data_adi', 'data-detay': 'data_detay' }) return df[[ 'data_adi', 'data_detay', 'original_week_period', 'original_date_text', 'date' ]]
def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch(patch.object(date_parser, "parse", collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch("dateparser.date.date_parser", new=self.date_parser)) self.parser = DateDataParser(*args, **kwds)
class Parser(): def __init__(self, languages = ['en']): self.parser = DateDataParser(languages=languages) @lru_cache(maxsize=256) def _parse(self, s): return self.parser.get_date_data(s).get('date_obj') def parse_date(self, d,t): try: delta = datetime.now() - self._parse(d) truncated = False except TypeError: delta = timedelta(days = 30) truncated = True scraped = datetime.fromisoformat(t) return datetime.date(scraped - delta), truncated
def given_parser(self): def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(date_string): self.freshness_result = get_date_data(date_string) return self.freshness_result return wrapped self.add_patch(patch.object(freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime) dt_mock.utcnow = Mock(return_value=self.now) self.add_patch(patch('dateparser.freshness_date_parser.datetime', new=dt_mock)) self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser()
def _get_date_delta(self, date_val: str): field_ref = self._get_field_ref("tweak") if field_ref: tweak, _ = fpe_base.cleanup_value(field_ref.value, field_ref.radix) tweak = str(tweak).zfill(16) tweak_val = self._fpe_ff1.encrypt(tweak.encode(), field_ref.radix) else: tweak = "0000000000000000" tweak_val = self._fpe_ff1.encrypt(tweak.encode()) tweak_val = self._fpe_ff1.decode(tweak_val) days = int(tweak_val) % self.range + self.lower_range_days _date_val = None if self.format: try: _date_val = datetime.strptime(date_val, self.format).date() except ValueError: pass if not _date_val: _date_val = DateDataParser(settings={ "STRICT_PARSING": True }).get_date_data(date_val) _date_val = _date_val["date_obj"].date() return days, _date_val
def when_date_is_parsed_using_with_datedataparser(self, dt_string): ddp = DateDataParser(detect_languages_function=detect_languages) self.result = ddp.get_date_data(dt_string)["date_obj"]
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.date_string = NotImplemented self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 \xe0 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er F\xe9vrier \xe0 21:24', datetime(2012, 2, 1, 21, 24)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), # Turkish dates param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 13:13 AM', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), # Miscellaneous param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('vendredi, d\xe9cembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 D\xe9c 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 D\xe9cembre 2014 \xe0 09:00', datetime(2014, 12, 11, 9, 0)), param('f\xe9v 15, 2013', datetime(2013, 2, 15, 0, 0)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), ]) def test_dates_parsing(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(''), param('invalid date string'), param('Aug 7, 2014Aug 7, 2014'), ]) def test_dates_not_parsed(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_not_parsed() @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 14, 22, 00)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'past') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'future') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'current_period') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'current') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'last') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'first') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day( self, prefer_day_of_month=None): self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month) self.given_utcnow(datetime(2015, 2, 12)) self.given_parser() self.given_date_string('24 April 2012') self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015'), param('32 January 2015'), param('31 April 2015'), param('31 June 2015'), param('31 September 2015'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number( self, date_string): with self.assertRaisesRegexp(ValueError, 'Day not in range for month'): DateParser().parse(date_string) def given_utcnow(self, now): datetime_mock = Mock(wraps=datetime) datetime_mock.utcnow = Mock(return_value=now) self.add_patch( patch('dateparser.date_parser.datetime', new=datetime_mock)) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset))) def given_date_string(self, date_string): self.date_string = date_string def given_parser(self): def collecting_get_date_data(parse): @wraps(parse) def wrapped(date_string): self.date_result = parse(date_string) return self.date_result return wrapped self.add_patch( patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch( patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser() def given_configuration(self, key, value): self.add_patch(patch.object(settings, key, new=value)) def when_date_is_parsed(self): self.result = self.parser.get_date_data(self.date_string) def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_not_parsed(self): self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string) def then_date_was_parsed_by_date_parser(self): self.assertEqual(self.result['date_obj'], self.date_result)
def __init__(self, host, user, password): self.jira = JIRA(host, basic_auth=(user, password), max_retries=1) self.ddp = DateDataParser(languages=['pt', 'en'])
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Tagalog dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), # Chinese dates param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)), param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)), param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)), # Greek dates param('19 Ιουνίου 2016', datetime(2016, 6, 19, 0, 0)), param('8 Ιανουαρίου 2015', datetime(2015, 1, 8, 0, 0)), param('4 Μαρτίου 2015', datetime(2015, 3, 4, 0, 0)), param('29 Δεκεμβρίου 2015', datetime(2015, 12, 29, 0, 0)), param('4 Απριλίου 2015', datetime(2015, 4, 4, 0, 0)), param('19 Φεβρουαρίου 2015', datetime(2015, 2, 19, 0, 0)), param('16 Μαΐου 2015', datetime(2015, 5, 16, 0, 0)), param('21 Αυγούστου 2014', datetime(2014, 8, 21, 0, 0)), param('30 Σεπτεμβρίου 2014', datetime(2014, 9, 30, 0, 0)), param('24 Οκτωβρίου 2014', datetime(2014, 10, 24, 0, 0)), param('1 Ιουλίου 2014', datetime(2014, 7, 1, 0, 0)), param('27 Νοεμβρίου 2014', datetime(2014, 11, 27, 0, 0)), # Arabic dates param('١٦ أكتوبر، ٢٠١٥', datetime(2015, 10, 16, 0, 0)), param('١٦ يونيو، ٢٠١٦', datetime(2016, 6, 16, 0, 0)), # Korean param('2016년 6월 18일', datetime(2016, 6, 18, 0, 0)), # Hindi param('27 अगस्त 2014', datetime(2014, 8, 27, 0, 0)), param('8 दिसंबर 2014', datetime(2014, 12, 8, 0, 0)), param('23 फ़रवरी 2014', datetime(2014, 2, 23, 0, 0)), param('10 सितंबर 2014', datetime(2014, 9, 10, 0, 0)), param('11 अक्तूबर 2014', datetime(2014, 10, 11, 0, 0)), param('12 नवंबर 2014', datetime(2014, 11, 12, 0, 0)), param('16 जनवरी 2014', datetime(2014, 1, 16, 0, 0)), param('1 जून 2014', datetime(2014, 6, 1, 0, 0)), param('25 अप्रैल 2014', datetime(2014, 4, 25, 0, 0)), param('19 मई 2015', datetime(2015, 5, 19, 0, 0)), param('2 मार्च 2015', datetime(2015, 3, 2, 0, 0)), param('1 जुलाई 2015', datetime(2015, 7, 1, 0, 0)), # Swedish param('27 augusti 2014', datetime(2014, 8, 27, 0, 0)), param('7 mars 2011', datetime(2011, 3, 7, 0, 0)), param('30 januari 2015', datetime(2015, 1, 30, 0, 0)), param('28 februari 2015', datetime(2015, 2, 28, 0, 0)), # Norwegian param('5. januar 2014', datetime(2014, 1, 5, 0, 0)), param('12. februar 2014', datetime(2014, 2, 12, 0, 0)), param('12. mars 2013', datetime(2013, 3, 12, 0, 0)), param('4. april 2014', datetime(2014, 4, 4, 0, 0)), param('8. mai 2016', datetime(2016, 5, 8, 0, 0)), param('11. juni 2012', datetime(2012, 6, 11, 0, 0)), param('29. juli 2012', datetime(2012, 7, 29, 0, 0)), param('18. august 2012', datetime(2012, 8, 18, 0, 0)), param('1. september 2012', datetime(2012, 9, 1, 0, 0)), param('6. oktober 2014', datetime(2014, 10, 6, 0, 0)), param('28. desember 2014', datetime(2014, 12, 28, 0, 0)), ]) def test_dates_parsing(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser(settings={'NORMALIZE': False}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), #wrong param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Filipino dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), ]) def test_dates_parsing_with_normalization(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser(settings={'NORMALIZE': True}) self.when_date_is_parsed(normalize_unicode(date_string)) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string'), param('Aug 7, 2014Aug 7, 2014'), param('24h ago'), ]) def test_dates_not_parsed(self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ["unknown string format"]) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('10:00PM', datetime(2015, 2, 14, 22, 00)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'past'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'future'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'current_period'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month( self, date_string, today=None, expected=None): self.given_utcnow(today) self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month( self, date_string, today=None, expected=None): self.given_utcnow(today) self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month( self, date_string, today=None, expected=None): self.given_utcnow(today) self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day( self, prefer_day_of_month=None): self.given_utcnow(datetime(2015, 2, 12)) self.given_parser( settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month}) self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) def test_date_is_parsed_when_skip_tokens_are_supplied(self): self.given_utcnow(datetime(2015, 2, 12)) self.given_parser(settings={'SKIP_TOKENS': ['de']}) self.when_date_is_parsed('24 April 2012 de') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015'), param('32 January 2015'), param('31 April 2015'), param('31 June 2015'), param('31 September 2015'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number( self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['Day not in range for month']) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) def given_utcnow(self, now): datetime_mock = Mock(wraps=datetime) datetime_mock.utcnow = Mock(return_value=now) self.add_patch( patch('dateparser.date_parser.datetime', new=datetime_mock)) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset))) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch( patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch( patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0])
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), ]) def test_dates_parsing(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 1, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string'), param('Aug 7, 2014Aug 7, 2014'), param('24h ago'), ]) def test_dates_not_parsed(self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ["unknown string format"]) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('10:00PM', datetime(2015, 2, 14, 22, 00)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'past') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'future') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'current_period') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'current') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'last') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month( self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'first') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day( self, prefer_day_of_month=None): self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month) self.given_utcnow(datetime(2015, 2, 12)) self.given_parser() self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015'), param('32 January 2015'), param('31 April 2015'), param('31 June 2015'), param('31 September 2015'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number( self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['Day not in range for month']) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) def given_utcnow(self, now): datetime_mock = Mock(wraps=datetime) datetime_mock.utcnow = Mock(return_value=now) self.add_patch( patch('dateparser.date_parser.datetime', new=datetime_mock)) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset))) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(date_string): self.date_result = parse(date_string) return self.date_result return wrapped self.add_patch( patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch( patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def given_configuration(self, key, value): self.add_patch(patch.object(settings, key, new=value)) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0])
class RelevancePeriodExtractor(Task): """A Task runner that extracts the period a sources's content reffers to (is relevant for). """ def __init__(self, config): super(RelevancePeriodExtractor, self).__init__(config) timeliness_params = self.config['timeliness'] self.extract_period = timeliness_params.get('extract_period', False) self.timeliness_strategy = timeliness_params.get('timeliness_strategy', []) self.date_order = timeliness_params.get('date_order', 'DMY') self.max_empty_relevance_period = timeliness_params.get('max_empty_relevance_period', 10) if not self.timeliness_strategy: raise ValueError('You need to provide values for "timeliness_strategy."') datapackage_check = DataPackageChecker(self.config) datapackage_check.check_database_completeness([self.source_file]) settings = {'RETURN_AS_TIMEZONE_AWARE': False, 'PREFER_DAY_OF_MONTH': 'last', 'PREFER_DATES_FROM': 'past', 'SKIP_TOKENS': ['to'], 'DATE_ORDER': self.date_order} self.date_parser = DateDataParser(allow_redetect_language=True, settings=settings) def run(self): """Try to indentify the relevance period of sources""" sources = self.extract_period_from_sources() empty_period_sources = [source for source in sources if source['period_id'] is None] empty_period_percent = (len(empty_period_sources) * 100) / len(sources) empty_period_percent = round(empty_period_percent) if empty_period_percent > int(self.max_empty_relevance_period): msg = ('The relevance period couldn\'t be identified for' ' {0}% of sources therefore timeliness cannot be' ' assessed. Please provide more fields for "timeliness_' 'strategy", set "assess_timeliness" to false or increase' ' "max_empty_relevance_period".').format(empty_period_percent) raise exceptions.UnableToAssessTimeliness(msg) for source in sources: if source['period_id'] is None: creation_date = utilities.date_from_string(source['created_at']) dates = [creation_date, creation_date] else: period_start, period_end = source['period_id'] dates = [period_start.date(), period_end.date()] dates = [date.strftime('%d-%m-%Y') if isinstance(date, datetime.date) else '' for date in dates] source['period_id'] = '/'.join(dates) self.update_sources_period(sources) def extract_period_from_sources(self): """Try to extract relevance period for each source or return None""" sources = [] with compat.UnicodeDictReader(self.source_file) as source_file: timeliness_set = set(self.timeliness_strategy) found_fields = timeliness_set.intersection(set(source_file.header)) if not found_fields: raise ValueError(('At least one of the "timeliness_strategy" ' 'fields must be present in your "source_file".')) if not found_fields.issuperset(timeliness_set): missing_fields = timeliness_set.difference(found_fields) print(('Fields "{0}" from "timeliness_strategy" were not found ' 'in your `source_file`').format(missing_fields)) for source in source_file: timeliness_fields = {field: val for field, val in source.items() if field in self.timeliness_strategy} extracted_period = self.identify_period(timeliness_fields) source['period_id'] = extracted_period sources.append(source) return sources def identify_period(self, source={}): """Try to indentify the period of a source based on timeliess strategy Args: source: a dict corresponding to a source_file row """ field_dates = {} for field in self.timeliness_strategy: value = source.get(field, '') if not value: continue field_dates[field] = self.extract_dates(value) for field in self.timeliness_strategy: dates = field_dates.get(field, []) if not dates: continue period = resolve_period(dates) if period: break else: # It means we have more than 2 dates other_fields = list(self.timeliness_strategy) other_fields.remove(field) other_values = [field_dates.get(other_field, []) for other_field in other_fields] for values in other_values: date_objects = set(date['date_obj'] for date in dates) common_values = [date for date in values if date['date_obj'] in date_objects] period = resolve_period(common_values) if period: break else: period = None return period def extract_dates(self, line=""): """Try to extract dates from a line Args: line: a string that could contain a date or time range """ dates = [] potential_dates = re.findall(r'[0-9]+[\W_][0-9]+[\W_][0-9]+', line) line_words = re.sub(r'[\W_]+', ' ', line).split() years = filter_years(line_words) for word in years: if re.search(r'[a-zA-Z]', word): potential_dates.append(word) break for index, entry in enumerate(line_words): if entry == word: date = self.scan_for_date(line_words, index) if date: potential_dates.append(date) # Try to find a range if date['period'] != 'year' and date['date_obj']: range_start = self.scan_for_range(line_words, index, date) if not range_start: continue if range_start['date_obj'] < date['date_obj']: potential_dates.append(range_start) for potential_date in potential_dates: try: dates.append(self.date_parser.get_date_data(potential_date)) except TypeError: if isinstance(potential_date, dict): dates.append(potential_date) except ValueError: potential_date = None dates = [date for date in dates if date['date_obj'] is not None] dates = list({date['date_obj']:date for date in dates}.values()) return dates def scan_for_date(self, line_words, year_index): """Scan around the year for a date as complete as possible Args: line_words: a list of words (strings) year_index: index of a string from line_word that contains a year """ date_parts = line_words[year_index-2:year_index+1] or \ line_words[:year_index+1] potential_date = self.create_date_from_parts(date_parts) if not potential_date or potential_date['period'] == 'year': new_parts = list(reversed(line_words[year_index:year_index+3])) new_potential_date = self.create_date_from_parts(new_parts) if new_potential_date: potential_date = new_potential_date return potential_date def scan_for_range(self, line_words, year_index, range_end): """Scan to the left of the year whose corresponding date has been extracted to see if there is a range. Args: line_words: a list of words (strings) year_index: index of a string from line_word that contains a year range_end: date that has already been extracted from the year at year_index, potentially end of range """ if range_end['period'] == 'month': scan_start = year_index-2 scan_end = year_index-4 else: scan_start = year_index-3 scan_end = year_index-5 range_start_parts = line_words[scan_end:scan_start+1] or \ line_words[:scan_start+1] range_start_parts = [part for part in range_start_parts if self.create_date_from_parts([part]) is not None] years = filter_years(range_start_parts) if years: range_start_parts = [] if range_start_parts: if len(range_start_parts) == 1 and range_end['period'] == 'day': range_start_parts.append(compat.str(range_end['date_obj'].month)) range_start_parts.append(compat.str(range_end['date_obj'].year)) range_start = self.create_date_from_parts(range_start_parts) if range_start and range_start['period'] != range_end['period']: range_start = None return range_start def create_date_from_parts(self, date_parts=None): """Try to create a date object with date_parser or return None.""" if not date_parts: return None for index, part in enumerate(date_parts): if len(date_parts) == 2: if False not in [el.isdigit() for el in date_parts]: date_parts.insert(index, '31') potential_date = ' '.join(date_parts[index:]) try: date = self.date_parser.get_date_data(potential_date) except (ValueError, TypeError): date = None if date and date.get('date_obj') is not None: break else: date = None return date def update_sources_period(self, new_sources): """Overwrite source_file with the identified period_id""" source_resource = utilities.get_datapackage_resource(self.source_file, self.datapackage) source_idx = self.datapackage.resources.index(source_resource) source_schema_dict = self.datapackage.resources[source_idx].descriptor['schema'] updates = {'fields':[{'name': 'period_id', 'type': 'string', 'title': 'The period source data is relevant for.'}]} utilities.deep_update_dict(source_schema_dict, updates) source_schema = SchemaModel(source_schema_dict) with compat.UnicodeWriter(self.source_file) as source_file: source_file.writerow(source_schema.headers) for row in utilities.dicts_to_schema_rows(new_sources, source_schema): source_file.writerow(row)
def _parser_get_date(self, date_string, date_formats, languages): parser = DateDataParser(languages) return parser.get_date_data(date_string, date_formats)
class TestFreshnessDateDataParser(BaseTestCase): def setUp(self): super(TestFreshnessDateDataParser, self).setUp() self.now = datetime(2014, 9, 1, 10, 30) self.date_string = NotImplemented self.parser = NotImplemented self.result = NotImplemented self.freshness_parser = NotImplemented self.freshness_result = NotImplemented self.date = NotImplemented self.time = NotImplemented @parameterized.expand([ # English dates param('yesterday', ago={'days': 1}, period='day'), param('the day before yesterday', ago={'days': 2}, period='day'), param('today', ago={'days': 0}, period='day'), param('an hour ago', ago={'hours': 1}, period='day'), param('about an hour ago', ago={'hours': 1}, period='day'), param('a day ago', ago={'days': 1}, period='day'), param('a week ago', ago={'weeks': 1}, period='week'), param('one week ago', ago={'weeks': 1}, period='week'), param('2 hours ago', ago={'hours': 2}, period='day'), param('about 23 hours ago', ago={'hours': 23}, period='day'), param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'), param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'), param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'), param('15 hr', ago={'hours': 15}, period='day'), param('15 hrs', ago={'hours': 15}, period='day'), param('2 min', ago={'minutes': 2}, period='day'), param('2 mins', ago={'minutes': 2}, period='day'), param('3 sec', ago={'seconds': 3}, period='day'), param('1000 years ago', ago={'years': 1000}, period='year'), param('2013 years ago', ago={'years': 2013}, period='year'), # We've fixed .now in setUp param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'), param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'), param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), param('just now', ago={'seconds': 0}, period='day'), # French dates param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd’hui", ago={'days': 0}, period='day'), param("Aujourdʼhui", ago={'days': 0}, period='day'), param("Aujourdʻhui", ago={'days': 0}, period='day'), param("Aujourd՚hui", ago={'days': 0}, period='day'), param("Aujourdꞌhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd′hui", ago={'days': 0}, period='day'), param("Aujourd‵hui", ago={'days': 0}, period='day'), param("Aujourdʹhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Hier", ago={'days': 1}, period='day'), param("Avant-hier", ago={'days': 2}, period='day'), param('Il ya un jour', ago={'days': 1}, period='day'), param('Il ya une heure', ago={'hours': 1}, period='day'), param('Il ya 2 heures', ago={'hours': 2}, period='day'), param('Il ya environ 23 heures', ago={'hours': 23}, period='day'), param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'), param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'), param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), param('Il y a 40 min', ago={'minutes': 40}, period='day'), # German dates param('Heute', ago={'days': 0}, period='day'), param('Gestern', ago={'days': 1}, period='day'), param('vorgestern', ago={'days': 2}, period='day'), param('vor einem Tag', ago={'days': 1}, period='day'), param('vor einer Stunden', ago={'hours': 1}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'), param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'), param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'), param('vor 29h', ago={'hours': 29}, period='day'), param('vor 29m', ago={'minutes': 29}, period='day'), param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Italian dates param('oggi', ago={'days': 0}, period='day'), param('ieri', ago={'days': 1}, period='day'), param('2 ore fa', ago={'hours': 2}, period='day'), param('circa 23 ore fa', ago={'hours': 23}, period='day'), param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'), param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'), param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Portuguese dates param('ontem', ago={'days': 1}, period='day'), param('anteontem', ago={'days': 2}, period='day'), param('hoje', ago={'days': 0}, period='day'), param('uma hora atrás', ago={'hours': 1}, period='day'), param('1 segundo atrás', ago={'seconds': 1}, period='day'), param('um dia atrás', ago={'days': 1}, period='day'), param('uma semana atrás', ago={'weeks': 1}, period='week'), param('2 horas atrás', ago={'hours': 2}, period='day'), param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'), param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Turkish dates param('Dün', ago={'days': 1}, period='day'), param('Bugün', ago={'days': 0}, period='day'), param('2 saat önce', ago={'hours': 2}, period='day'), param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'), param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'), param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'), param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Russian dates param('сегодня', ago={'days': 0}, period='day'), param('Вчера в', ago={'days': 1}, period='day'), param('вчера', ago={'days': 1}, period='day'), param('2 часа назад', ago={'hours': 2}, period='day'), param('час назад', ago={'hours': 1}, period='day'), param('минуту назад', ago={'minutes': 1}, period='day'), param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'), param('около 23 часов назад', ago={'hours': 23}, period='day'), param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'), param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'), param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Czech dates param('Dnes', ago={'days': 0}, period='day'), param('Včera', ago={'days': 1}, period='day'), param('Předevčírem', ago={'days': 2}, period='day'), param('Před 2 hodinami', ago={'hours': 2}, period='day'), param('před přibližně 23 hodin', ago={'hours': 23}, period='day'), param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'), param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'), param('3 dny', ago={'days': 3}, period='day'), param('3 hodiny', ago={'hours': 3}, period='day'), param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Spanish dates param('anteayer', ago={'days': 2}, period='day'), param('ayer', ago={'days': 1}, period='day'), param('hoy', ago={'days': 0}, period='day'), param('hace una hora', ago={'hours': 1}, period='day'), param('Hace un día', ago={'days': 1}, period='day'), param('Hace una semana', ago={'weeks': 1}, period='week'), param('Hace 2 horas', ago={'hours': 2}, period='day'), param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'), param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Chinese dates param('昨天', ago={'days': 1}, period='day'), param('前天', ago={'days': 2}, period='day'), param('2小时前', ago={'hours': 2}, period='day'), param('约23小时前', ago={'hours': 23}, period='day'), param('1年2个月', ago={'years': 1, 'months': 2}, period='month'), param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1年11个月', ago={'years': 1, 'months': 11}, period='month'), param('1年,1月,1周,1天,1小时,1分钟前', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Arabic dates param('اليوم', ago={'days': 0}, period='day'), param('يوم أمس', ago={'days': 1}, period='day'), param('منذ يومين', ago={'days': 2}, period='day'), param('منذ 3 أيام', ago={'days': 3}, period='day'), param('منذ 21 أيام', ago={'days': 21}, period='day'), param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Thai dates param('วันนี้', ago={'days': 0}, period='day'), param('เมื่อวานนี้', ago={'days': 1}, period='day'), param('2 วัน', ago={'days': 2}, period='day'), param('2 ชั่วโมง', ago={'hours': 2}, period='day'), param('23 ชม.', ago={'hours': 23}, period='day'), param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'), param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Vietnamese dates param('Hôm nay', ago={'days': 0}, period='day'), param('Hôm qua', ago={'days': 1}, period='day'), param('2 giờ', ago={'hours': 2}, period='day'), param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'), # following test unsupported, refer to discussion at: # http://github.com/scrapinghub/dateparser/issues/33 #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút', # ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, # period='day'), # Belarusian dates param('сёння', ago={'days': 0}, period='day'), param('учора ў', ago={'days': 1}, period='day'), param('ўчора', ago={'days': 1}, period='day'), param('пазаўчора', ago={'days': 2}, period='day'), param('2 гадзіны таму назад', ago={'hours': 2}, period='day'), param('2 гадзіны таму', ago={'hours': 2}, period='day'), param('гадзіну назад', ago={'hours': 1}, period='day'), param('хвіліну таму', ago={'minutes': 1}, period='day'), param('2 гадзіны 21 хвіл. назад', ago={'hours': 2, 'minutes': 21}, period='day'), param('каля 23 гадзін назад', ago={'hours': 23}, period='day'), param('1 год 2 месяцы', ago={'years': 1, 'months': 2}, period='month'), param('1 год, 09 месяцаў, 01 тыдзень', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('2 гады 3 месяцы', ago={'years': 2, 'months': 3}, period='month'), param('5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад', ago={'years': 5, 'months': 1, 'weeks': 6, 'days': 3, 'hours': 5, 'minutes': 1, 'seconds': 3}, period='day'), # Polish dates param("wczoraj", ago={'days': 1}, period='day'), param("1 godz. 2 minuty temu", ago={'hours': 1, 'minutes': 2}, period='day'), param("2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu", ago={'years': 2, 'months': 3, 'weeks': 1, 'days': 2, 'hours': 4, 'minutes': 15, 'seconds': 25}, period='day'), param("2 minuty temu", ago={'minutes': 2}, period='day'), param("15 minut temu", ago={'minutes': 15}, period='day'), ]) def test_relative_dates(self, date_string, ago, period): self.given_parser(settings={'NORMALIZE': False}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_date_obj_is_exactly_this_time_ago(ago) self.then_period_is(period) @parameterized.expand([ # English dates param('yesterday', ago={'days': 1}, period='day'), param('the day before yesterday', ago={'days': 2}, period='day'), param('today', ago={'days': 0}, period='day'), param('an hour ago', ago={'hours': 1}, period='day'), param('about an hour ago', ago={'hours': 1}, period='day'), param('a day ago', ago={'days': 1}, period='day'), param('a week ago', ago={'weeks': 1}, period='week'), param('one week ago', ago={'weeks': 1}, period='week'), param('2 hours ago', ago={'hours': 2}, period='day'), param('about 23 hours ago', ago={'hours': 23}, period='day'), param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'), param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'), param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'), param('15 hr', ago={'hours': 15}, period='day'), param('15 hrs', ago={'hours': 15}, period='day'), param('2 min', ago={'minutes': 2}, period='day'), param('2 mins', ago={'minutes': 2}, period='day'), param('3 sec', ago={'seconds': 3}, period='day'), param('1000 years ago', ago={'years': 1000}, period='year'), param('2013 years ago', ago={'years': 2013}, period='year'), # We've fixed .now in setUp param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'), param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'), param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), param('just now', ago={'seconds': 0}, period='day'), # French dates param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd’hui", ago={'days': 0}, period='day'), param("Aujourdʼhui", ago={'days': 0}, period='day'), param("Aujourdʻhui", ago={'days': 0}, period='day'), param("Aujourd՚hui", ago={'days': 0}, period='day'), param("Aujourdꞌhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd′hui", ago={'days': 0}, period='day'), param("Aujourd‵hui", ago={'days': 0}, period='day'), param("Aujourdʹhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Hier", ago={'days': 1}, period='day'), param("Avant-hier", ago={'days': 2}, period='day'), param('Il ya un jour', ago={'days': 1}, period='day'), param('Il ya une heure', ago={'hours': 1}, period='day'), param('Il ya 2 heures', ago={'hours': 2}, period='day'), param('Il ya environ 23 heures', ago={'hours': 23}, period='day'), param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'), param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'), param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), param('Il y a 40 min', ago={'minutes': 40}, period='day'), # German dates param('Heute', ago={'days': 0}, period='day'), param('Gestern', ago={'days': 1}, period='day'), param('vorgestern', ago={'days': 2}, period='day'), param('vor einem Tag', ago={'days': 1}, period='day'), param('vor einer Stunden', ago={'hours': 1}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'), param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'), param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'), param('vor 29h', ago={'hours': 29}, period='day'), param('vor 29m', ago={'minutes': 29}, period='day'), param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Italian dates param('oggi', ago={'days': 0}, period='day'), param('ieri', ago={'days': 1}, period='day'), param('2 ore fa', ago={'hours': 2}, period='day'), param('circa 23 ore fa', ago={'hours': 23}, period='day'), param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'), param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'), param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Portuguese dates param('ontem', ago={'days': 1}, period='day'), param('anteontem', ago={'days': 2}, period='day'), param('hoje', ago={'days': 0}, period='day'), param('uma hora atrás', ago={'hours': 1}, period='day'), param('1 segundo atrás', ago={'seconds': 1}, period='day'), param('um dia atrás', ago={'days': 1}, period='day'), param('uma semana atrás', ago={'weeks': 1}, period='week'), param('2 horas atrás', ago={'hours': 2}, period='day'), param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'), param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Turkish dates param('Dün', ago={'days': 1}, period='day'), param('Bugün', ago={'days': 0}, period='day'), param('2 saat önce', ago={'hours': 2}, period='day'), param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'), param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'), param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'), param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Russian dates param('сегодня', ago={'days': 0}, period='day'), param('Вчера в', ago={'days': 1}, period='day'), param('вчера', ago={'days': 1}, period='day'), param('2 часа назад', ago={'hours': 2}, period='day'), param('час назад', ago={'hours': 1}, period='day'), param('минуту назад', ago={'minutes': 1}, period='day'), param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'), param('около 23 часов назад', ago={'hours': 23}, period='day'), param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'), param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'), param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Czech dates param('Dnes', ago={'days': 0}, period='day'), param('Včera', ago={'days': 1}, period='day'), param('Předevčírem', ago={'days': 2}, period='day'), param('Před 2 hodinami', ago={'hours': 2}, period='day'), param('před přibližně 23 hodin', ago={'hours': 23}, period='day'), param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'), param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'), param('3 dny', ago={'days': 3}, period='day'), param('3 hodiny', ago={'hours': 3}, period='day'), param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Spanish dates param('anteayer', ago={'days': 2}, period='day'), param('ayer', ago={'days': 1}, period='day'), param('hoy', ago={'days': 0}, period='day'), param('hace una hora', ago={'hours': 1}, period='day'), param('Hace un día', ago={'days': 1}, period='day'), param('Hace una semana', ago={'weeks': 1}, period='week'), param('Hace 2 horas', ago={'hours': 2}, period='day'), param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'), param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Chinese dates param('昨天', ago={'days': 1}, period='day'), param('前天', ago={'days': 2}, period='day'), param('2小时前', ago={'hours': 2}, period='day'), param('约23小时前', ago={'hours': 23}, period='day'), param('1年2个月', ago={'years': 1, 'months': 2}, period='month'), param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1年11个月', ago={'years': 1, 'months': 11}, period='month'), param('1年,1月,1周,1天,1小时,1分钟前', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Arabic dates param('اليوم', ago={'days': 0}, period='day'), param('يوم أمس', ago={'days': 1}, period='day'), param('منذ يومين', ago={'days': 2}, period='day'), param('منذ 3 أيام', ago={'days': 3}, period='day'), param('منذ 21 أيام', ago={'days': 21}, period='day'), param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Thai dates param('วันนี้', ago={'days': 0}, period='day'), param('เมื่อวานนี้', ago={'days': 1}, period='day'), param('2 วัน', ago={'days': 2}, period='day'), param('2 ชั่วโมง', ago={'hours': 2}, period='day'), param('23 ชม.', ago={'hours': 23}, period='day'), param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'), param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Vietnamese dates param('Hôm nay', ago={'days': 0}, period='day'), param('Hôm qua', ago={'days': 1}, period='day'), param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'), # Belarusian dates param('сёння', ago={'days': 0}, period='day'), param('учора ў', ago={'days': 1}, period='day'), param('ўчора', ago={'days': 1}, period='day'), param('пазаўчора', ago={'days': 2}, period='day'), param('2 гадзіны таму назад', ago={'hours': 2}, period='day'), param('2 гадзіны таму', ago={'hours': 2}, period='day'), param('гадзіну назад', ago={'hours': 1}, period='day'), param('хвіліну таму', ago={'minutes': 1}, period='day'), param('2 гадзіны 21 хвіл. назад', ago={'hours': 2, 'minutes': 21}, period='day'), param('каля 23 гадзін назад', ago={'hours': 23}, period='day'), param('1 год 2 месяцы', ago={'years': 1, 'months': 2}, period='month'), param('1 год, 09 месяцаў, 01 тыдзень', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('2 гады 3 месяцы', ago={'years': 2, 'months': 3}, period='month'), param('5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад', ago={'years': 5, 'months': 1, 'weeks': 6, 'days': 3, 'hours': 5, 'minutes': 1, 'seconds': 3}, period='day'), # Polish dates param("wczoraj", ago={'days': 1}, period='day'), param("1 godz. 2 minuty temu", ago={'hours': 1, 'minutes': 2}, period='day'), param("2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu", ago={'years': 2, 'months': 3, 'weeks': 1, 'days': 2, 'hours': 4, 'minutes': 15, 'seconds': 25}, period='day'), param("2 minuty temu", ago={'minutes': 2}, period='day'), param("15 minut temu", ago={'minutes': 15}, period='day'), ]) def test_normalized_relative_dates(self, date_string, ago, period): date_string = normalize_unicode(date_string) self.given_parser(settings={'NORMALIZE': True}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_date_obj_is_exactly_this_time_ago(ago) self.then_period_is(period) @parameterized.expand([ param('15th of Aug, 2014 Diane Bennett'), ]) def test_insane_dates(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_not_parsed() @parameterized.expand([ param('5000 years ago'), param('2014 years ago'), # We've fixed .now in setUp param('{} months ago'.format(2013 * 12 + 9)), ]) def test_dates_not_supported_by_date_time(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_raised(ValueError, ['year is out of range', "('year must be in 1..9999'"]) @parameterized.expand([ param('несколько секунд назад', boundary={'seconds': 45}, period='day'), param('há alguns segundos', boundary={'seconds': 45}, period='day'), ]) def test_inexplicit_dates(self, date_string, boundary, period): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_period_is(period) self.then_date_obj_is_between(self.now - timedelta(**boundary), self.now) @parameterized.expand([ param('Today at 9 pm', date(2014, 9, 1), time(21, 0)), param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)), param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)), param('the day before yesterday 16:50', date(2014, 8, 30), time(16, 50)), param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)), param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)), param('Dnes v 12:40', date(2014, 9, 1), time(12, 40)), param('1 week ago at 12:00 am', date(2014, 8, 25), time(0, 0)), ]) def test_freshness_date_with_time(self, date_string, date, time): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', 'Asia/Karachi', date(2014, 9, 1), time(13, 30)), param('3 hours ago', 'Europe/Paris', date(2014, 9, 1), time(9, 30)), param('5 hours ago', 'US/Eastern', date(2014, 9, 1), time(1, 30)), # date in DST range param('Today at 9 pm', 'Asia/Karachi', date(2014, 9, 1), time(21, 0)), # time given, hence, no shift applies ]) def test_freshness_date_with_pytz_timezones(self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', 'PKT', date(2014, 9, 1), time(13, 30)), param('5 hours ago', 'EST', date(2014, 9, 1), time(0, 30)), param('3 hours ago', 'MET', date(2014, 9, 1), time(8, 30)), ]) def test_freshness_date_with_timezone_abbreviations(self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', '+05:00', date(2014, 9, 1), time(13, 30)), param('5 hours ago', '-05:00', date(2014, 9, 1), time(0, 30)), param('3 hours ago', '+01:00', date(2014, 9, 1), time(8, 30)), ]) def test_freshness_date_with_timezone_utc_offset(self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) def given_date_string(self, date_string): self.date_string = date_string def given_parser(self, settings=None): def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(*args, **kwargs): self.freshness_result = get_date_data(*args, **kwargs) return self.freshness_result return wrapped self.add_patch(patch.object(freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) self.add_patch(patch.object(self.freshness_parser, 'now', self.now)) dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime) dt_mock.utcnow = Mock(return_value=self.now) self.add_patch(patch('dateparser.freshness_date_parser.datetime', new=dt_mock)) self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser(settings=settings) def when_date_is_parsed(self): try: self.result = self.parser.get_date_data(self.date_string) except Exception as error: self.error = error def then_date_is(self, date): self.assertEqual(date, self.result['date_obj'].date()) def then_time_is(self, time): self.assertEqual(time, self.result['date_obj'].time()) def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_is_between(self, low_boundary, high_boundary): self.assertGreater(self.result['date_obj'], low_boundary) self.assertLess(self.result['date_obj'], high_boundary) def then_date_obj_is_exactly_this_time_ago(self, ago): self.assertEqual(self.now - relativedelta(**ago), self.result['date_obj']) def then_date_was_not_parsed(self): self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string) def then_date_was_parsed_by_freshness_parser(self): self.assertEqual(self.result, self.freshness_result) def then_error_was_not_raised(self): self.assertEqual(NotImplemented, self.error)
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), ]) def test_dates_parsing(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 1, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string'), param('Aug 7, 2014Aug 7, 2014'), param('24h ago'), ]) def test_dates_not_parsed(self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ["unknown string format"]) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('10:00PM', datetime(2015, 2, 14, 22, 00)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'past') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'future') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'current_period') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'current') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'last') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'first') self.given_utcnow(today) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None): self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month) self.given_utcnow(datetime(2015, 2, 12)) self.given_parser() self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015'), param('32 January 2015'), param('31 April 2015'), param('31 June 2015'), param('31 September 2015'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['Day not in range for month']) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) def given_utcnow(self, now): datetime_mock = Mock(wraps=datetime) datetime_mock.utcnow = Mock(return_value=now) self.add_patch(patch('dateparser.date_parser.datetime', new=datetime_mock)) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset)) ) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(date_string): self.date_result = parse(date_string) return self.date_result return wrapped self.add_patch(patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def given_configuration(self, key, value): self.add_patch(patch.object(settings, key, new=value)) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0])
class TestFreshnessDateDataParser(BaseTestCase): def setUp(self): super(TestFreshnessDateDataParser, self).setUp() self.now = datetime(2014, 9, 1, 10, 30) self.date_string = NotImplemented self.parser = NotImplemented self.result = NotImplemented self.freshness_parser = NotImplemented self.freshness_result = NotImplemented self.exception = NotImplemented self.date = NotImplemented self.time = NotImplemented @parameterized.expand([ # English dates param('yesterday', ago={'days': 1}, period='day'), param('the day before yesterday', ago={'days': 2}, period='day'), param('today', ago={'days': 0}, period='day'), param('an hour ago', ago={'hours': 1}, period='day'), param('about an hour ago', ago={'hours': 1}, period='day'), param('a day ago', ago={'days': 1}, period='day'), param('a week ago', ago={'weeks': 1}, period='week'), param('one week ago', ago={'weeks': 1}, period='week'), param('2 hours ago', ago={'hours': 2}, period='day'), param('about 23 hours ago', ago={'hours': 23}, period='day'), param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'), param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'), param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'), param('15 hr', ago={'hours': 15}, period='day'), param('15 hrs', ago={'hours': 15}, period='day'), param('2 min', ago={'minutes': 2}, period='day'), param('2 mins', ago={'minutes': 2}, period='day'), param('3 sec', ago={'seconds': 3}, period='day'), param('1000 years ago', ago={'years': 1000}, period='year'), param('2013 years ago', ago={'years': 2013}, period='year'), # We've fixed .now in setUp param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'), param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'), param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # French dates param("Aujourd'hui", ago={'days': 0}, period='day'), param("Hier", ago={'days': 1}, period='day'), param("Avant-hier", ago={'days': 2}, period='day'), param('Il ya un jour', ago={'days': 1}, period='day'), param('Il ya une heure', ago={'hours': 1}, period='day'), param('Il ya 2 heures', ago={'hours': 2}, period='day'), param('Il ya environ 23 heures', ago={'hours': 23}, period='day'), param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'), param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'), param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), param('Il y a 40 min', ago={'minutes': 40}, period='day'), # German dates param('Heute', ago={'days': 0}, period='day'), param('Gestern', ago={'days': 1}, period='day'), param('vorgestern', ago={'days': 2}, period='day'), param('vor einem Tag', ago={'days': 1}, period='day'), param('vor einer Stunden', ago={'hours': 1}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'), param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'), param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'), param('vor 29h', ago={'hours': 29}, period='day'), param('vor 29m', ago={'minutes': 29}, period='day'), param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Italian dates param('oggi', ago={'days': 0}, period='day'), param('ieri', ago={'days': 1}, period='day'), param('2 ore fa', ago={'hours': 2}, period='day'), param('circa 23 ore fa', ago={'hours': 23}, period='day'), param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'), param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'), param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Portuguese dates param('ontem', ago={'days': 1}, period='day'), param('anteontem', ago={'days': 2}, period='day'), param('hoje', ago={'days': 0}, period='day'), param('uma hora atrás', ago={'hours': 1}, period='day'), param('um dia atrás', ago={'days': 1}, period='day'), param('uma semana atrás', ago={'weeks': 1}, period='week'), param('2 horas atrás', ago={'hours': 2}, period='day'), param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'), param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Turkish dates param('Dün', ago={'days': 1}, period='day'), param('2 saat önce', ago={'hours': 2}, period='day'), param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'), param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'), param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'), param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Russian dates param('сегодня', ago={'days': 0}, period='day'), param('Вчера в', ago={'days': 1}, period='day'), param('вчера', ago={'days': 1}, period='day'), param('2 часа назад', ago={'hours': 2}, period='day'), param('час назад', ago={'hours': 1}, period='day'), param('минуту назад', ago={'minutes': 1}, period='day'), param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'), param('около 23 часов назад', ago={'hours': 23}, period='day'), param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'), param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'), param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Czech dates param('Před 2 hodinami', ago={'hours': 2}, period='day'), param('před přibližně 23 hodin', ago={'hours': 23}, period='day'), param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'), param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'), param('3 dny', ago={'days': 3}, period='day'), param('3 hodiny', ago={'hours': 3}, period='day'), param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Spanish dates param('anteayer', ago={'days': 2}, period='day'), param('ayer', ago={'days': 1}, period='day'), param('hoy', ago={'days': 0}, period='day'), param('hace una hora', ago={'hours': 1}, period='day'), param('Hace un día', ago={'days': 1}, period='day'), param('Hace una semana', ago={'weeks': 1}, period='week'), param('Hace 2 horas', ago={'hours': 2}, period='day'), param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'), param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'), param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'), param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Chinese dates param('昨天', ago={'days': 1}, period='day'), param('前天', ago={'days': 2}, period='day'), param('2小时前', ago={'hours': 2}, period='day'), param('约23小时前', ago={'hours': 23}, period='day'), param('1年2个月', ago={'years': 1, 'months': 2}, period='month'), param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1年11个月', ago={'years': 1, 'months': 11}, period='month'), param('1年,1月,1周,1天,1小时,1分钟前', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Arabic dates param('اليوم', ago={'days': 0}, period='day'), param('يوم أمس', ago={'days': 1}, period='day'), param('منذ يومين', ago={'days': 2}, period='day'), param('منذ 3 أيام', ago={'days': 3}, period='day'), param('منذ 21 أيام', ago={'days': 21}, period='day'), param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Thai dates param('วันนี้', ago={'days': 0}, period='day'), param('เมื่อวานนี้', ago={'days': 1}, period='day'), param('2 วัน', ago={'days': 2}, period='day'), param('2 ชั่วโมง', ago={'hours': 2}, period='day'), param('23 ชม.', ago={'hours': 23}, period='day'), param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'), param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'), param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที', ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, period='day'), # Vietnamese dates param('Hôm nay', ago={'days': 0}, period='day'), param('Hôm qua', ago={'days': 1}, period='day'), param('2 giờ', ago={'hours': 2}, period='day'), param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'), # following test unsupported, refer to discussion at: # http://github.com/scrapinghub/dateparser/issues/33 #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút', # ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, # period='day'), ]) def test_relative_dates(self, date_string, ago, period): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_date_obj_is_exactly_this_time_ago(ago) self.then_period_is(period) @parameterized.expand([ param('15th of Aug, 2014 Diane Bennett'), ]) def test_insane_dates(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_not_parsed() @parameterized.expand([ param('5000 years ago'), param('2014 years ago'), # We've fixed .now in setUp param('{} months ago'.format(2013 * 12 + 9)), ]) def test_dates_not_supported_by_date_time(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_raised(ValueError, 'year is out of range') @parameterized.expand([ param('несколько секунд назад', boundary={'seconds': 45}, period='day'), param('há alguns segundos', boundary={'seconds': 45}, period='day'), ]) def test_inexplicit_dates(self, date_string, boundary, period): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_period_is(period) self.then_date_obj_is_between(self.now - timedelta(**boundary), self.now) @parameterized.expand([ param('Today at 9 pm', date(2014, 9, 1), time(21, 0)), param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)), param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)), param('the day before yesterday 16:50', date(2014, 8, 30), time(16, 50)), param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)), param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)), ]) def test_freshness_date_with_time(self, date_string, date, time): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) def given_date_string(self, date_string): self.date_string = date_string def given_parser(self): self.add_patch(patch.object(freshness_date_parser, 'now', self.now)) def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(date_string): self.freshness_result = get_date_data(date_string) return self.freshness_result return wrapped self.add_patch(patch.object(freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser() def when_date_is_parsed(self): try: self.result = self.parser.get_date_data(self.date_string) except Exception as error: self.exception = error def then_date_is(self, date): self.assertEqual(date, self.result['date_obj'].date()) def then_time_is(self, time): self.assertEqual(time, self.result['date_obj'].time()) def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_is_between(self, low_boundary, high_boundary): self.assertGreater(self.result['date_obj'], low_boundary) self.assertLess(self.result['date_obj'], high_boundary) def then_date_obj_is_exactly_this_time_ago(self, ago): self.assertEqual(self.now - relativedelta(**ago), self.result['date_obj']) def then_date_was_not_parsed(self): self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string) def then_date_was_parsed_by_freshness_parser(self): self.assertEqual(self.result, self.freshness_result) def then_error_was_not_raised(self): self.assertEqual(NotImplemented, self.exception) def then_error_was_raised(self, error_cls, expected_regexp=None): self.assertIsInstance(self.exception, error_cls) if expected_regexp is None: return if isinstance(expected_regexp, basestring): expected_regexp = re.compile(expected_regexp) if not expected_regexp.search(str(self.exception)): raise self.failureException('"%s" does not match "%s"' % (expected_regexp.pattern, str(self.exception)))
def _parse_date(self, string): parser = DateDataParser() date = parser.get_date_data(string)['date_obj'] if date is None: raise RuntimeError('Unable to parse date: {!r}'.format(string)) return date.date()
def adapt(self, text, htmlpage=None): try: return DateDataParser().get_date_data(text)['date_obj'] except ValueError: return
def date_to_datetime_range(text, relative_base=None, prefer_dates_from='past'): if relative_base is None: relative_base = doc_date # Handle relative date ranges like "the past ___ days" relative_num_days = re.sub(relative_duration_range_re, "", text) if len(relative_num_days) < len(text): num_days_datetime_range = date_to_datetime_range( relative_num_days) if not num_days_datetime_range: return None return [num_days_datetime_range[0], relative_base] text = clean_date_str(text) if len(text) < 3: return None # Handle ordinal dates like "the second month of 2006" match = ordinal_date_re.match(text) if match: match_dict = match.groupdict() if match_dict['ordinal']: ordinal_number = ORDINALS.index(match_dict['ordinal']) + 1 else: ordinal_number = int(match_dict['ordinal_number']) unit = match_dict['unit'] rest = match_dict['rest'] if unit == 'day': return date_to_datetime_range( str(ordinal_number) + " " + rest) elif unit == 'week': if ordinal_number > 4: return parsed_remainder = date_to_datetime_range("1 " + rest) if not parsed_remainder: return week_start = parsed_remainder[0] week_start = date_to_datetime_range( "Sunday", # A day is added because if the base date is on Sunday # the prior sunday will be used. relative_base=week_start + relativedelta(days=1))[0] for _ in range(ordinal_number - 1): week_start = date_to_datetime_range( "Sunday", relative_base=week_start + relativedelta(days=1), prefer_dates_from='future')[0] return [week_start, week_start + relativedelta(days=7)] elif unit == 'month': month_name = datetime.datetime(2000, ordinal_number, 1).strftime("%B ") return date_to_datetime_range(month_name + rest) else: raise Exception("Unknown time unit: " + unit) # handle dates like "1950s" since dateparser doesn't decade_match = re.match(r"(\d{4})s", text) if decade_match: decade = int(decade_match.groups()[0]) return [ datetime.datetime(decade, 1, 1), datetime.datetime(decade + 10, 1, 1) ] parser = DateDataParser( ['en'], settings={ 'RELATIVE_BASE': relative_base or datetime.datetime.now(), 'PREFER_DATES_FROM': prefer_dates_from }) try: text = re.sub(r" year$", "", text) date_data = parser.get_date_data(text) except (TypeError, ValueError): return if date_data['date_obj']: date = date_data['date_obj'] if date_data['period'] == 'day': return [date, date + relativedelta(days=1)] elif date_data['period'] == 'month': date = datetime.datetime(date.year, date.month, 1) return [date, date + relativedelta(months=1)] elif date_data['period'] == 'year': date = datetime.datetime(date.year, 1, 1) return [date, date + relativedelta(years=1)]
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand( [ # English dates param("[Sept] 04, 2014.", datetime(2014, 9, 4)), param("Tuesday Jul 22, 2014", datetime(2014, 7, 22)), param("10:04am EDT", datetime(2012, 11, 13, 14, 4)), param("Friday", datetime(2012, 11, 9)), param("November 19, 2014 at noon", datetime(2014, 11, 19, 12, 0)), param("December 13, 2014 at midnight", datetime(2014, 12, 13, 0, 0)), param("Nov 25 2014 10:17 pm EST", datetime(2014, 11, 26, 3, 17)), param("Wed Aug 05 12:00:00 EDT 2015", datetime(2015, 8, 5, 16, 0)), param("April 9, 2013 at 6:11 a.m.", datetime(2013, 4, 9, 6, 11)), param("Aug. 9, 2012 at 2:57 p.m.", datetime(2012, 8, 9, 14, 57)), param("December 10, 2014, 11:02:21 pm", datetime(2014, 12, 10, 23, 2, 21)), param("8:25 a.m. Dec. 12, 2014", datetime(2014, 12, 12, 8, 25)), param("2:21 p.m., December 11, 2014", datetime(2014, 12, 11, 14, 21)), param("Fri, 12 Dec 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)), param("20 Mar 2013 10h11", datetime(2013, 3, 20, 10, 11)), param("10:06am Dec 11, 2014", datetime(2014, 12, 11, 10, 6)), param("19 February 2013 year 09:10", datetime(2013, 2, 19, 9, 10)), # French dates param("11 Mai 2014", datetime(2014, 5, 11)), param("dimanche, 11 Mai 2014", datetime(2014, 5, 11)), param("22 janvier 2015 à 14h40", datetime(2015, 1, 22, 14, 40)), param("Dimanche 1er Février à 21:24", datetime(2012, 2, 1, 21, 24)), param("vendredi, décembre 5 2014.", datetime(2014, 12, 5, 0, 0)), param("le 08 Déc 2014 15:11", datetime(2014, 12, 8, 15, 11)), param("Le 11 Décembre 2014 à 09:00", datetime(2014, 12, 11, 9, 0)), param("fév 15, 2013", datetime(2013, 2, 15, 0, 0)), param("Jeu 15:12", datetime(2012, 11, 8, 15, 12)), # Spanish dates param("Martes 21 de Octubre de 2014", datetime(2014, 10, 21)), param("Miércoles 20 de Noviembre de 2013", datetime(2013, 11, 20)), param("12 de junio del 2012", datetime(2012, 6, 12)), param("13 Ago, 2014", datetime(2014, 8, 13)), param("13 Septiembre, 2014", datetime(2014, 9, 13)), param("11 Marzo, 2014", datetime(2014, 3, 11)), param("julio 5, 2015 en 1:04 pm", datetime(2015, 7, 5, 13, 4)), param("Vi 17:15", datetime(2012, 11, 9, 17, 15)), # Dutch dates param("11 augustus 2014", datetime(2014, 8, 11)), param("14 januari 2014", datetime(2014, 1, 14)), param("vr jan 24, 2014 12:49", datetime(2014, 1, 24, 12, 49)), # Italian dates param("16 giu 2014", datetime(2014, 6, 16)), param("26 gennaio 2014", datetime(2014, 1, 26)), param("Ven 18:23", datetime(2012, 11, 9, 18, 23)), # Portuguese dates param("sexta-feira, 10 de junho de 2014 14:52", datetime(2014, 6, 10, 14, 52)), param("13 Setembro, 2014", datetime(2014, 9, 13)), param("Sab 3:03", datetime(2012, 11, 10, 3, 3)), # Russian dates param("10 мая", datetime(2012, 5, 10)), # forum.codenet.ru param("26 апреля", datetime(2012, 4, 26)), param("20 ноября 2013", datetime(2013, 11, 20)), param("28 октября 2014 в 07:54", datetime(2014, 10, 28, 7, 54)), param("13 января 2015 г. в 13:34", datetime(2015, 1, 13, 13, 34)), param("09 августа 2012", datetime(2012, 8, 9, 0, 0)), param("Авг 26, 2015 15:12", datetime(2015, 8, 26, 15, 12)), param("2 Декабрь 95 11:15", datetime(1995, 12, 2, 11, 15)), param("13 янв. 2005 19:13", datetime(2005, 1, 13, 19, 13)), param("13 авг. 2005 19:13", datetime(2005, 8, 13, 19, 13)), param("13 авг. 2005г. 19:13", datetime(2005, 8, 13, 19, 13)), param("13 авг. 2005 г. 19:13", datetime(2005, 8, 13, 19, 13)), # Turkish dates param("11 Ağustos, 2014", datetime(2014, 8, 11)), param("08.Haziran.2014, 11:07", datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param("17.Şubat.2014, 17:51", datetime(2014, 2, 17, 17, 51)), param("14-Aralık-2012, 20:56", datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param("13 iunie 2013", datetime(2013, 6, 13)), param("14 aprilie 2014", datetime(2014, 4, 14)), param("18 martie 2012", datetime(2012, 3, 18)), param("S 14:14", datetime(2012, 11, 10, 14, 14)), param("12-Iun-2013", datetime(2013, 6, 12)), # German dates param("21. Dezember 2013", datetime(2013, 12, 21)), param("19. Februar 2012", datetime(2012, 2, 19)), param("26. Juli 2014", datetime(2014, 7, 26)), param("18.10.14 um 22:56 Uhr", datetime(2014, 10, 18, 22, 56)), param("12-Mär-2014", datetime(2014, 3, 12)), param("Mit 13:14", datetime(2012, 11, 7, 13, 14)), # Czech dates param("pon 16. čer 2014 10:07:43", datetime(2014, 6, 16, 10, 7, 43)), param("13 Srpen, 2014", datetime(2014, 8, 13)), param("čtv 14. lis 2013 12:38:43", datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param("ธันวาคม 11, 2014, 08:55:08 PM", datetime(2014, 12, 11, 20, 55, 8)), param("22 พฤษภาคม 2012, 22:12", datetime(2012, 5, 22, 22, 12)), param("11 กุมภา 2020, 8:13 AM", datetime(2020, 2, 11, 8, 13)), param("1 เดือนตุลาคม 2005, 1:00 AM", datetime(2005, 10, 1, 1, 0)), param("11 ก.พ. 2020, 1:13 pm", datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param("Thứ năm", datetime(2012, 11, 8)), # Thursday param("Thứ sáu", datetime(2012, 11, 9)), # Friday param("Tháng Mười Hai 29, 2013, 14:14", datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param("05 Tháng một 2015 - 03:54 AM", datetime(2015, 1, 5, 3, 54)), # Belarusian dates param("11 траўня", datetime(2012, 5, 11)), param("4 мая", datetime(2012, 5, 4)), param("Чацвер 06 жніўня 2015", datetime(2015, 8, 6)), param("Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін", datetime(2015, 3, 14, 7, 10)), param("5 жніўня 2015 года у 13:34", datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param("2015-кві-12", datetime(2015, 4, 12)), param("21 чер 2013 3:13", datetime(2013, 6, 21, 3, 13)), param("12 лютого 2012, 13:12:23", datetime(2012, 2, 12, 13, 12, 23)), param("вів о 14:04", datetime(2012, 11, 6, 14, 4)), # Tagalog dates param("12 Hulyo 2003 13:01", datetime(2003, 7, 12, 13, 1)), param("1978, 1 Peb, 7:05 PM", datetime(1978, 2, 1, 19, 5)), param("2 hun", datetime(2012, 6, 2)), param("Lin 16:16", datetime(2012, 11, 11, 16, 16)), # Japanese dates param("2016年3月20日(日) 21時40分", datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param("06-17-2014", datetime(2014, 6, 17)), param("13/03/2014", datetime(2014, 3, 13)), param("11. 12. 2014, 08:45:39", datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param("1 Ni 2015", datetime(2015, 4, 1, 0, 0)), param("1 Mar 2015", datetime(2015, 3, 1, 0, 0)), param("1 Paz 2015", datetime(2015, 10, 1, 0, 0)), param("1 сер 2015", datetime(2015, 8, 1, 0, 0)), param("2016020417:10", datetime(2016, 2, 4, 17, 10)), # Chinese dates param("2015年04月08日10:05", datetime(2015, 4, 8, 10, 5)), param("2012年12月20日10:35", datetime(2012, 12, 20, 10, 35)), param("2016年 2月 5日", datetime(2016, 2, 5, 0, 0)), ] ) def test_dates_parsing(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={"NORMALIZE": False, "RELATIVE_BASE": datetime(2012, 11, 13)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is("day") self.then_date_obj_exactly_is(expected) def test_stringified_datetime_should_parse_fine(self): expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256) self.given_parser(settings={"RELATIVE_BASE": expected_date}) date_string = str(self.parser.get_date_data("today")["date_obj"]) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is("day") self.then_date_obj_exactly_is(expected_date) @parameterized.expand( [ # English dates param("[Sept] 04, 2014.", datetime(2014, 9, 4)), param("Tuesday Jul 22, 2014", datetime(2014, 7, 22)), param("10:04am EDT", datetime(2012, 11, 13, 14, 4)), param("Friday", datetime(2012, 11, 9)), param("November 19, 2014 at noon", datetime(2014, 11, 19, 12, 0)), param("December 13, 2014 at midnight", datetime(2014, 12, 13, 0, 0)), param("Nov 25 2014 10:17 pm EST", datetime(2014, 11, 26, 3, 17)), param("Wed Aug 05 12:00:00 EDT 2015", datetime(2015, 8, 5, 16, 0)), param("April 9, 2013 at 6:11 a.m.", datetime(2013, 4, 9, 6, 11)), param("Aug. 9, 2012 at 2:57 p.m.", datetime(2012, 8, 9, 14, 57)), param("December 10, 2014, 11:02:21 pm", datetime(2014, 12, 10, 23, 2, 21)), param("8:25 a.m. Dec. 12, 2014", datetime(2014, 12, 12, 8, 25)), param("2:21 p.m., December 11, 2014", datetime(2014, 12, 11, 14, 21)), param("Fri, 12 Dec 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)), param("20 Mar 2013 10h11", datetime(2013, 3, 20, 10, 11)), param("10:06am Dec 11, 2014", datetime(2014, 12, 11, 10, 6)), param("19 February 2013 year 09:10", datetime(2013, 2, 19, 9, 10)), # French dates param("11 Mai 2014", datetime(2014, 5, 11)), param("dimanche, 11 Mai 2014", datetime(2014, 5, 11)), param("22 janvier 2015 à 14h40", datetime(2015, 1, 22, 14, 40)), # wrong param("Dimanche 1er Février à 21:24", datetime(2012, 2, 1, 21, 24)), param("vendredi, décembre 5 2014.", datetime(2014, 12, 5, 0, 0)), param("le 08 Déc 2014 15:11", datetime(2014, 12, 8, 15, 11)), param("Le 11 Décembre 2014 à 09:00", datetime(2014, 12, 11, 9, 0)), param("fév 15, 2013", datetime(2013, 2, 15, 0, 0)), param("Jeu 15:12", datetime(2012, 11, 8, 15, 12)), # Spanish dates param("Martes 21 de Octubre de 2014", datetime(2014, 10, 21)), param("Miércoles 20 de Noviembre de 2013", datetime(2013, 11, 20)), param("12 de junio del 2012", datetime(2012, 6, 12)), param("13 Ago, 2014", datetime(2014, 8, 13)), param("13 Septiembre, 2014", datetime(2014, 9, 13)), param("11 Marzo, 2014", datetime(2014, 3, 11)), param("julio 5, 2015 en 1:04 pm", datetime(2015, 7, 5, 13, 4)), param("Vi 17:15", datetime(2012, 11, 9, 17, 15)), # Dutch dates param("11 augustus 2014", datetime(2014, 8, 11)), param("14 januari 2014", datetime(2014, 1, 14)), param("vr jan 24, 2014 12:49", datetime(2014, 1, 24, 12, 49)), # Italian dates param("16 giu 2014", datetime(2014, 6, 16)), param("26 gennaio 2014", datetime(2014, 1, 26)), param("Ven 18:23", datetime(2012, 11, 9, 18, 23)), # Portuguese dates param("sexta-feira, 10 de junho de 2014 14:52", datetime(2014, 6, 10, 14, 52)), param("13 Setembro, 2014", datetime(2014, 9, 13)), param("Sab 3:03", datetime(2012, 11, 10, 3, 3)), # Russian dates param("10 мая", datetime(2012, 5, 10)), # forum.codenet.ru param("26 апреля", datetime(2012, 4, 26)), param("20 ноября 2013", datetime(2013, 11, 20)), param("28 октября 2014 в 07:54", datetime(2014, 10, 28, 7, 54)), param("13 января 2015 г. в 13:34", datetime(2015, 1, 13, 13, 34)), param("09 августа 2012", datetime(2012, 8, 9, 0, 0)), param("Авг 26, 2015 15:12", datetime(2015, 8, 26, 15, 12)), param("2 Декабрь 95 11:15", datetime(1995, 12, 2, 11, 15)), param("13 янв. 2005 19:13", datetime(2005, 1, 13, 19, 13)), param("13 авг. 2005 19:13", datetime(2005, 8, 13, 19, 13)), param("13 авг. 2005г. 19:13", datetime(2005, 8, 13, 19, 13)), param("13 авг. 2005 г. 19:13", datetime(2005, 8, 13, 19, 13)), # Turkish dates param("11 Ağustos, 2014", datetime(2014, 8, 11)), param("08.Haziran.2014, 11:07", datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param("17.Şubat.2014, 17:51", datetime(2014, 2, 17, 17, 51)), param("14-Aralık-2012, 20:56", datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param("13 iunie 2013", datetime(2013, 6, 13)), param("14 aprilie 2014", datetime(2014, 4, 14)), param("18 martie 2012", datetime(2012, 3, 18)), param("S 14:14", datetime(2012, 11, 10, 14, 14)), param("12-Iun-2013", datetime(2013, 6, 12)), # German dates param("21. Dezember 2013", datetime(2013, 12, 21)), param("19. Februar 2012", datetime(2012, 2, 19)), param("26. Juli 2014", datetime(2014, 7, 26)), param("18.10.14 um 22:56 Uhr", datetime(2014, 10, 18, 22, 56)), param("12-Mär-2014", datetime(2014, 3, 12)), param("Mit 13:14", datetime(2012, 11, 7, 13, 14)), # Czech dates param("pon 16. čer 2014 10:07:43", datetime(2014, 6, 16, 10, 7, 43)), param("13 Srpen, 2014", datetime(2014, 8, 13)), param("čtv 14. lis 2013 12:38:43", datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param("ธันวาคม 11, 2014, 08:55:08 PM", datetime(2014, 12, 11, 20, 55, 8)), param("22 พฤษภาคม 2012, 22:12", datetime(2012, 5, 22, 22, 12)), param("11 กุมภา 2020, 8:13 AM", datetime(2020, 2, 11, 8, 13)), param("1 เดือนตุลาคม 2005, 1:00 AM", datetime(2005, 10, 1, 1, 0)), param("11 ก.พ. 2020, 1:13 pm", datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param("Thứ năm", datetime(2012, 11, 8)), # Thursday param("Thứ sáu", datetime(2012, 11, 9)), # Friday param("Tháng Mười Hai 29, 2013, 14:14", datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param("05 Tháng một 2015 - 03:54 AM", datetime(2015, 1, 5, 3, 54)), # Belarusian dates param("11 траўня", datetime(2012, 5, 11)), param("4 мая", datetime(2012, 5, 4)), param("Чацвер 06 жніўня 2015", datetime(2015, 8, 6)), param("Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін", datetime(2015, 3, 14, 7, 10)), param("5 жніўня 2015 года у 13:34", datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param("2015-кві-12", datetime(2015, 4, 12)), param("21 чер 2013 3:13", datetime(2013, 6, 21, 3, 13)), param("12 лютого 2012, 13:12:23", datetime(2012, 2, 12, 13, 12, 23)), param("вів о 14:04", datetime(2012, 11, 6, 14, 4)), # Filipino dates param("12 Hulyo 2003 13:01", datetime(2003, 7, 12, 13, 1)), param("1978, 1 Peb, 7:05 PM", datetime(1978, 2, 1, 19, 5)), param("2 hun", datetime(2012, 6, 2)), param("Lin 16:16", datetime(2012, 11, 11, 16, 16)), # Japanese dates param("2016年3月20日(日) 21時40分", datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param("06-17-2014", datetime(2014, 6, 17)), param("13/03/2014", datetime(2014, 3, 13)), param("11. 12. 2014, 08:45:39", datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param("1 Ni 2015", datetime(2015, 4, 1, 0, 0)), param("1 Mar 2015", datetime(2015, 3, 1, 0, 0)), param("1 Paz 2015", datetime(2015, 10, 1, 0, 0)), param("1 сер 2015", datetime(2015, 8, 1, 0, 0)), ] ) def test_dates_parsing_with_normalization(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={"NORMALIZE": True, "RELATIVE_BASE": datetime(2012, 11, 13)}) self.when_date_is_parsed(normalize_unicode(date_string)) self.then_date_was_parsed_by_date_parser() self.then_period_is("day") self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("Sep 03 2014 | 4:32 pm EDT", datetime(2014, 9, 3, 20, 32)), param("17th October, 2034 @ 01:08 am PDT", datetime(2034, 10, 17, 8, 8)), param("15 May 2004 23:24 EDT", datetime(2004, 5, 16, 3, 24)), param("15 May 2004", datetime(2004, 5, 15, 0, 0)), param("08/17/14 17:00 (PDT)", datetime(2014, 8, 18, 0, 0)), ] ) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is("day") self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("15 May 2004 16:10 -0400", datetime(2004, 5, 15, 20, 10)), param("1999-12-31 19:00:00 -0500", datetime(2000, 1, 1, 0, 0)), param("1999-12-31 19:00:00 +0500", datetime(1999, 12, 31, 14, 0)), param("Fri, 09 Sep 2005 13:51:39 -0700", datetime(2005, 9, 9, 20, 51, 39)), param("Fri, 09 Sep 2005 13:51:39 +0000", datetime(2005, 9, 9, 13, 51, 39)), ] ) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is("day") self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser("") self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand( [ param("invalid date string", "Unable to parse: h"), param("Aug 7, 2014Aug 7, 2014", "Unable to parse: Aug"), param("24h ago", "Unable to parse: h"), param("2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00", "Unable to parser: 00:002015"), ] ) def test_dates_not_parsed(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, message) @parameterized.expand( [ param("10 December", datetime(2014, 12, 10)), param("March", datetime(2014, 3, 15)), param("Friday", datetime(2015, 2, 13)), param("Monday", datetime(2015, 2, 9)), param("10:00PM", datetime(2015, 2, 14, 22, 0)), param("16:10", datetime(2015, 2, 14, 16, 10)), param("14:05", datetime(2015, 2, 15, 14, 5)), param("15 february 15:00", datetime(2015, 2, 15, 15, 0)), ] ) def test_preferably_past_dates(self, date_string, expected): self.given_parser(settings={"PREFER_DATES_FROM": "past", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("10 December", datetime(2015, 12, 10)), param("March", datetime(2015, 3, 15)), param("Friday", datetime(2015, 2, 20)), param("Monday", datetime(2015, 2, 16)), param("10:00PM", datetime(2015, 2, 15, 22, 0)), param("16:10", datetime(2015, 2, 15, 16, 10)), param("14:05", datetime(2015, 2, 16, 14, 5)), ] ) def test_preferably_future_dates(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={"PREFER_DATES_FROM": "future", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("10 December", datetime(2015, 12, 10)), param("March", datetime(2015, 3, 15)), param("Friday", datetime(2015, 2, 13)), param("10:00PM", datetime(2015, 2, 15, 22, 00)), param("16:10", datetime(2015, 2, 15, 16, 10)), param("14:05", datetime(2015, 2, 15, 14, 5)), ] ) def test_dates_without_preference(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser( settings={"PREFER_DATES_FROM": "current_period", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)} ) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("February 2015", today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param("February 2012", today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param("April 2015", today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ] ) def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={"PREFER_DAY_OF_MONTH": "current", "RELATIVE_BASE": today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("February 2015", today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param("February 2012", today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param("April 2015", today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ] ) def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={"PREFER_DAY_OF_MONTH": "last", "RELATIVE_BASE": today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("February 2015", today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param("February 2012", today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param("April 2015", today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ] ) def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={"PREFER_DAY_OF_MONTH": "first", "RELATIVE_BASE": today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [param(prefer_day_of_month="current"), param(prefer_day_of_month="last"), param(prefer_day_of_month="first")] ) def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None): self.given_parser(settings={"PREFER_DAY_OF_MONTH": prefer_day_of_month, "RELATIVE_BASE": datetime(2015, 2, 12)}) self.when_date_is_parsed("24 April 2012") self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) def test_date_is_parsed_when_skip_tokens_are_supplied(self): self.given_parser(settings={"SKIP_TOKENS": ["de"], "RELATIVE_BASE": datetime(2015, 2, 12)}) self.when_date_is_parsed("24 April 2012 de") self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand( [ param("29 February 2015", "day must be in 1..28"), param("32 January 2015", "day must be in 1..31"), param("31 April 2015", "day must be in 1..30"), param("31 June 2015", "day must be in 1..30"), param("31 September 2015", "day must be in 1..30"), ] ) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ["day is out of range for month", message]) @parameterized.expand( [ param("2015-05-02T10:20:19+0000", languages=["fr"], expected=datetime(2015, 5, 2, 10, 20, 19)), param("2015-05-02T10:20:19+0000", languages=["en"], expected=datetime(2015, 5, 2, 10, 20, 19)), param("2015-05-02T10:20:19+0000", languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ] ) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages, settings={"PREFER_LANGUAGE_DATE_ORDER": False}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand( [ param("10 December", expected=datetime(2015, 12, 10), period="day"), param("March", expected=datetime(2015, 3, 15), period="month"), param("April", expected=datetime(2015, 4, 15), period="month"), param("December", expected=datetime(2015, 12, 15), period="month"), param("Friday", expected=datetime(2015, 2, 13), period="day"), param("Monday", expected=datetime(2015, 2, 9), period="day"), param("10:00PM", expected=datetime(2015, 2, 15, 22, 00), period="day"), param("16:10", expected=datetime(2015, 2, 15, 16, 10), period="day"), param("2014", expected=datetime(2014, 2, 15), period="year"), param("2008", expected=datetime(2008, 2, 15), period="year"), ] ) def test_extracted_period(self, date_string, expected=None, period=None): self.given_local_tz_offset(0) self.given_parser(settings={"RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand( [ param("15-12-18 06:00", expected=datetime(2015, 12, 18, 6, 0), order="YMD"), param("15-18-12 06:00", expected=datetime(2015, 12, 18, 6, 0), order="YDM"), param("10-11-12 06:00", expected=datetime(2012, 10, 11, 6, 0), order="MDY"), param("10-11-12 06:00", expected=datetime(2011, 10, 12, 6, 0), order="MYD"), param("10-11-12 06:00", expected=datetime(2011, 12, 10, 6, 0), order="DYM"), param("15-12-18 06:00", expected=datetime(2018, 12, 15, 6, 0), order="DMY"), ] ) def test_order(self, date_string, expected=None, order=None): self.given_parser(settings={"DATE_ORDER": order}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, "local_tz_offset", new=timedelta(seconds=3600 * offset)) ) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch(patch.object(date_parser, "parse", collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch("dateparser.date.date_parser", new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result["period"]) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result["date_obj"]) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result["date_obj"], self.date_result[0])
def annotate(self, doc): # If no date is associated with the document, the document's date will # be treated as the most recent date explicitly mentioned in the # the document. detect_date = doc.date is None doc_date = doc.date or datetime.datetime.now() strict_parser = DateDataParser(['en'], settings={'STRICT_PARSING': True}) def date_to_datetime_range(text, relative_base=None, prefer_dates_from='past'): if relative_base is None: relative_base = doc_date # Handle relative date ranges like "the past ___ days" relative_num_days = re.sub(relative_duration_range_re, "", text) if len(relative_num_days) < len(text): num_days_datetime_range = date_to_datetime_range( relative_num_days) if not num_days_datetime_range: return None return [num_days_datetime_range[0], relative_base] text = clean_date_str(text) if len(text) < 3: return None # Handle ordinal dates like "the second month of 2006" match = ordinal_date_re.match(text) if match: match_dict = match.groupdict() if match_dict['ordinal']: ordinal_number = ORDINALS.index(match_dict['ordinal']) + 1 else: ordinal_number = int(match_dict['ordinal_number']) unit = match_dict['unit'] rest = match_dict['rest'] if unit == 'day': return date_to_datetime_range( str(ordinal_number) + " " + rest) elif unit == 'week': if ordinal_number > 4: return parsed_remainder = date_to_datetime_range("1 " + rest) if not parsed_remainder: return week_start = parsed_remainder[0] week_start = date_to_datetime_range( "Sunday", # A day is added because if the base date is on Sunday # the prior sunday will be used. relative_base=week_start + relativedelta(days=1))[0] for _ in range(ordinal_number - 1): week_start = date_to_datetime_range( "Sunday", relative_base=week_start + relativedelta(days=1), prefer_dates_from='future')[0] return [week_start, week_start + relativedelta(days=7)] elif unit == 'month': month_name = datetime.datetime(2000, ordinal_number, 1).strftime("%B ") return date_to_datetime_range(month_name + rest) else: raise Exception("Unknown time unit: " + unit) # handle dates like "1950s" since dateparser doesn't decade_match = re.match(r"(\d{4})s", text) if decade_match: decade = int(decade_match.groups()[0]) return [ datetime.datetime(decade, 1, 1), datetime.datetime(decade + 10, 1, 1) ] parser = DateDataParser( ['en'], settings={ 'RELATIVE_BASE': relative_base or datetime.datetime.now(), 'PREFER_DATES_FROM': prefer_dates_from }) try: text = re.sub(r" year$", "", text) date_data = parser.get_date_data(text) except (TypeError, ValueError): return if date_data['date_obj']: date = date_data['date_obj'] if date_data['period'] == 'day': return [date, date + relativedelta(days=1)] elif date_data['period'] == 'month': date = datetime.datetime(date.year, date.month, 1) return [date, date + relativedelta(months=1)] elif date_data['period'] == 'year': date = datetime.datetime(date.year, 1, 1) return [date, date + relativedelta(years=1)] def parse_non_relative_date(text): result = date_to_datetime_range(text, relative_base=datetime.datetime( 900, 1, 1)) if result and result[0].year > 1000: # If the year is less than 1000 assume the year 900 # base date was used when parsing so the date is relative. return result[0] if 'structured_data' not in doc.tiers: doc.add_tiers(StructuredDataAnnotator()) if 'spacy.nes' not in doc.tiers: doc.add_tiers(SpacyAnnotator()) # Create a combine tier of nes and regex dates date_span_tier = doc.tiers['spacy.nes'].with_label('DATE') # Regex for formatted dates regex = re.compile( r"\b(" # parenthetical year r"((?<=[\[\(])[1-2]\d{3}(?=[\]\)]))|" # date MonthName yyyy r"(\d{1,2} [a-zA-Z]{3,} \[?\d{4})|" # dd-mm-yyyy r"(\d{1,2} ?[\/\-] ?\d{1,2} ?[\/\-] ?\d{1,4})|" # yyyy-MMM-dd r"(\d{1,4} ?[\/\-] ?[a-z]{3,4} ?[\/\-] ?\d{1,4})|" # yyyy-mm-dd r"(\d{1,4} ?[\/\-] ?\d{1,2} ?[\/\-] ?\d{1,2})" r")\b", re.I) match_tier = doc.create_regex_tier(regex) date_span_tier += match_tier # Add year components individually incase the full spans are thrown out. # Sometimes extra text is added to dates that makes them invalid, # this allows some of the date to be recovered. date_span_tier += date_span_tier.match_subspans(r"([1-2]\d{3})") # Remove spans that are probably ages. date_span_tier = date_span_tier.without_overlaps( date_span_tier.match_subspans(r"\bage\b")) # Group adjacent date info in case it is parsed as separate chunks. # ex: Friday, October 7th 2010. adjacent_date_spans = date_span_tier.combined_adjacent_spans( max_dist=9) grouped_date_spans = [] def can_combine(text): if re.match(r"\d{4}", text, re.I): # year only date return True try: return strict_parser.get_date_data(text)['date_obj'] is None except (TypeError, ValueError): return True for date_group in adjacent_date_spans: date_group_spans = list(date_group.iterate_leaf_base_spans()) if any(can_combine(span.text) for span in date_group_spans): if date_to_datetime_range(date_group.text) is not None: grouped_date_spans.append(date_group) # Find date ranges by looking for joiner words between dates. date_range_joiners = [ t_span for t_span in doc.tiers['spacy.tokens'] if re.match(r"(" + DATE_RANGE_JOINERS + r"|\-)$", t_span.text, re.I) ] date_range_tier = date_span_tier.label_spans('start')\ .with_following_spans_from(date_range_joiners, max_dist=3)\ .with_following_spans_from(date_span_tier.label_spans('end'), max_dist=3)\ .label_spans('date_range') since_tokens = AnnoTier([ t_span for t_span in doc.tiers['spacy.tokens'] if 'since' == t_span.token.lemma_ ], presorted=True).label_spans('since_token') since_date_tier = ( since_tokens.with_following_spans_from(date_span_tier, allow_overlap=True) + date_span_tier.with_contained_spans_from(since_tokens) ).label_spans('since_date') tier_spans = [] all_date_spans = AnnoTier(date_range_tier.spans + grouped_date_spans + date_span_tier.spans + since_date_tier.spans) if detect_date: simple_date_spans = AnnoTier( grouped_date_spans + date_span_tier.spans).optimal_span_set(prefer='text_length') latest_date = None for span in simple_date_spans: if re.match(r"today|yesterday", span.text, re.I): continue try: span_date = strict_parser.get_date_data( span.text)['date_obj'] except (TypeError, ValueError): continue if span_date and span_date < datetime.datetime.now(): if not latest_date or span_date > latest_date: latest_date = span_date if latest_date: doc_date = latest_date date_spans_without_structured_data = all_date_spans.without_overlaps( doc.tiers['structured_data']) date_spans_in_structured_data = [] dates_by_structured_value = doc.tiers['structured_data.values']\ .group_spans_by_containing_span(all_date_spans, allow_partial_containment=False) for value_span, date_spans in dates_by_structured_value: date_spans_in_structured_data += date_spans all_date_spans = AnnoTier(date_spans_without_structured_data.spans + date_spans_in_structured_data ).optimal_span_set(prefer='text_length') for date_span in all_date_spans: # Parse the span text into one or two components depending on # whether it contains multiple dates for specifying a range. if date_span.label == 'date_range': range_component_dict = date_span.groupdict() range_components = [ range_component_dict['start'][0].text, range_component_dict['end'][0].text ] else: range_components = re.split( r"\b(?:" + DATE_RANGE_JOINERS + r")\b", date_span.text, re.I) if len(range_components) == 1: hyphenated_components = date_span.text.split("-") if len(hyphenated_components) == 2: range_components = hyphenated_components elif len(hyphenated_components) == 6: # Handle dote ranges like 2015-11-3 - 2015-11-6 range_components = [ '-'.join(hyphenated_components[:3]), '-'.join(hyphenated_components[3:]) ] if ends_with_timeunit_re.match( date_span.text) and not relative_duration_range_re.match( date_span.text): # Prevent durations like "5 days" from being parsed as specific # dates like "5 days ago" continue elif len(range_components) == 1: if date_span.label == 'since_date': date_str = [ span for span in date_span.base_spans[0].base_spans if span.label != 'since_token' ][0].text datetime_range = date_to_datetime_range(date_str) if datetime_range is None: continue datetime_range = [datetime_range[0], doc_date] else: date_str = range_components[0] datetime_range = date_to_datetime_range(date_str) if datetime_range is None: continue elif len(range_components) == 2: # Handle partial years (e.g.: 2001-12) if re.match(r"\d{1,2}$", range_components[1]): if re.match(r".*\d{1,2}$", range_components[0]): characters_to_sub = "1" if len(range_components[1]) > 1: characters_to_sub = "1,2" range_components[1] = re.sub( r"\d{" + characters_to_sub + "}$", range_components[1], range_components[0]) # Check for a non-relative date in the range that can be used as # a relative base date the other date. # Example: March 3 to November 2 1984 non_relative_dates = [ parse_non_relative_date(text) for text in range_components ] relative_base_date = next((x for x in non_relative_dates if x), doc_date) datetime_range_a = date_to_datetime_range( range_components[0], relative_base=relative_base_date) datetime_range_b = date_to_datetime_range( range_components[1], relative_base=relative_base_date) if datetime_range_a is None and datetime_range_b is None: continue elif datetime_range_a is None: datetime_range = datetime_range_b elif datetime_range_b is None: datetime_range = datetime_range_a else: # If include_end_date is False treat the span's daterange # as ending at the start of the second date component unless # a word like "through" is used in the second component. if self.include_end_date or\ re.search(r"\bthrough\b", date_span.text) or\ re.search(r"\b(late|end of)\b", range_components[1]): datetime_range = [ datetime_range_a[0], datetime_range_b[1] ] else: datetime_range = [ datetime_range_a[0], datetime_range_b[0] ] else: print("Bad date range split:", date_span.text, range_components) continue # Omit reverse ranges because they usually come from something # being incorrectly parsed. The main exception is relative dates # like 2 to 3 weeks ago. if datetime_range[0] <= datetime_range[1]: tier_spans.append(DateSpan(date_span, datetime_range)) return { 'dates': AnnoTier(tier_spans, presorted=True), # Include unparsable and non-specific dates 'dates.all': all_date_spans }
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Tagalog dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), param('2016020417:10', datetime(2016, 2, 4, 17, 10)), # Chinese dates param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)), param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)), param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)), param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)), param('2016年6月29', datetime(2016, 6, 29, 0, 0)), param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)), ]) def test_dates_parsing(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'NORMALIZE': False, 'RELATIVE_BASE': datetime(2012, 11, 13)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_stringified_datetime_should_parse_fine(self): expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256) self.given_parser(settings={'RELATIVE_BASE': expected_date}) date_string = str(self.parser.get_date_data('today')['date_obj']) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected_date) @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), #wrong param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Filipino dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), ]) def test_dates_parsing_with_normalization(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'NORMALIZE': True, 'RELATIVE_BASE': datetime(2012, 11, 13)}) self.when_date_is_parsed(normalize_unicode(date_string)) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string', 'Unable to parse: h'), param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'), param('24h ago', 'Unable to parse: h'), param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00', 'Unable to parser: 00:002015') ]) def test_dates_not_parsed(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, message) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('10:00PM', datetime(2015, 2, 14, 22, 0)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), param('15 february 15:00', datetime(2015, 2, 15, 15, 0)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_parser(settings={'PREFER_DATES_FROM': 'past', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 0)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'current_period', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month, 'RELATIVE_BASE': datetime(2015, 2, 12)}) self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) def test_date_is_parsed_when_skip_tokens_are_supplied(self): self.given_parser(settings={'SKIP_TOKENS': ['de'], 'RELATIVE_BASE': datetime(2015, 2, 12)}) self.when_date_is_parsed('24 April 2012 de') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015', 'day must be in 1..28'), param('32 January 2015', 'day must be in 1..31'), param('31 April 2015', 'day must be in 1..30'), param('31 June 2015', 'day must be in 1..30'), param('31 September 2015', 'day must be in 1..30'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['day is out of range for month', message]) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages, settings={'PREFER_LANGUAGE_DATE_ORDER': False}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_local_tz_offset(0) self.given_parser(settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('15-12-18 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YMD'), param('15-18-12 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YDM'), param('10-11-12 06:00', expected=datetime(2012, 10, 11, 6, 0), order='MDY'), param('10-11-12 06:00', expected=datetime(2011, 10, 12, 6, 0), order='MYD'), param('10-11-12 06:00', expected=datetime(2011, 12, 10, 6, 0), order='DYM'), param('15-12-18 06:00', expected=datetime(2018, 12, 15, 6, 0), order='DMY'), param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'), param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'), param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'), ]) def test_order(self, date_string, expected=None, order=None): self.given_parser(settings={'DATE_ORDER': order}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset)) ) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch(patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0])
class Reminder(): def __init__(self, config=None): self.active_reminder = {} self.regex = r'\[(.*)\]' self.settings = {'PREFER_DATES_FROM': 'future', 'DATE_ORDER': 'DMY'} self.parser = DateDataParser(languages=['en'], allow_redetect_language=False, settings=self.settings) async def send_reminder_start_msg(self, user, channel, client, time): ''' Gives an acknowledgement that the reminder has been set. ''' time = time.replace(microsecond=0) msg = ":+1: %s I'll remind you at %s UTC." % (user, str(time)) await client.send_message(channel, msg) async def send_reminder_end_msg(self, user, channel, client, text): ''' Sends the message when the reminder finishes with the text if it was passed in. ''' if text: msg = 'Hello %s, you asked me to remind you of **%s**.' % (user, text) else: msg = 'Hello %s, you asked me to remind you at this time.' % user await client.send_message(channel, msg) async def start_reminder_sleep(self, delta, user, channel, client, text, time): ''' Asyncronously sleeps for the reminder length. ''' # Send a message that the reminder is going to be set. await self.send_reminder_start_msg(user, channel, client, time) await asyncio.sleep(delta.total_seconds()) await self.send_reminder_end_msg(user, channel, client, text) def apply_regex(self, msg): ''' Applies the regex to check if the user passed in a optional string in square brackets. Returns the original message with the string removed and the captured msg. ''' regex_result = re.search(self.regex, msg) if regex_result: msg = re.sub(self.regex, '', msg).strip() return msg, regex_result.group(1) else: return False def parse_msg(self, msg, user): ''' Parses the message passed along with the !remind command. Uses the dateparser library to check if the time string is valid Format: !remindme <time period> [optional string] ''' parsed_time = self.parser.get_date_data(msg)['date_obj'] if not parsed_time: error_msg = ('I could not interept your message %s, try specifing ' 'the time period in a different format.') % user return (False, error_msg) now = datetime.utcnow() if parsed_time < now: error_msg = ("Dont waste my time %s, you can't expect " "me to remind you of an event in the past.") % user return (False, error_msg) difference = parsed_time - now return (True, difference, parsed_time) @register('!remindme') async def set_reminder(self, msg, user, channel, client, *args, **kwargs): ''' Main function that called to set a reminder. Calls the helper functions to parse and to check if its valid. If the message is valid, the asyncronous sleep function is called. Currently loses state on restart ;_; could write/load to a file. ''' reminder_txt = None optional_string = self.apply_regex(msg) if optional_string: msg, reminder_txt = optional_string parsed_msg = self.parse_msg(msg, user) if not parsed_msg[0]: return parsed_msg[1] else: await self.start_reminder_sleep(parsed_msg[1], user, channel, client, reminder_txt, parsed_msg[2])
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.date_string = NotImplemented self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 14, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 \xe0 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er F\xe9vrier \xe0 21:24', datetime(2012, 2, 1, 21, 24)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), # Turkish dates param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), # Miscellaneous param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('vendredi, d\xe9cembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 D\xe9c 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 D\xe9cembre 2014 \xe0 09:00', datetime(2014, 12, 11, 9, 0)), param('f\xe9v 15, 2013', datetime(2013, 2, 15, 0, 0)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), ]) def test_dates_parsing(self, date_string, expected): self.given_utcnow(datetime(2012, 11, 13)) # Tuesday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)), param('15 May 2004', datetime(2004, 5, 15, 0, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_local_tz_offset(+1) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(''), param('invalid date string'), param('Aug 7, 2014Aug 7, 2014'), ]) def test_dates_not_parsed(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_not_parsed() @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 14, 22, 00)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'past') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'future') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_configuration('PREFER_DATES_FROM', 'current_period') self.given_utcnow(datetime(2015, 2, 15, 15, 30)) # Sunday self.given_local_tz_offset(0) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'current') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'last') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None): self.given_configuration('PREFER_DAY_OF_MONTH', 'first') self.given_utcnow(today) self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None): self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month) self.given_utcnow(datetime(2015, 2, 12)) self.given_parser() self.given_date_string('24 April 2012') self.when_date_is_parsed() self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015'), param('32 January 2015'), param('31 April 2015'), param('31 June 2015'), param('31 September 2015'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string): with self.assertRaisesRegexp(ValueError, 'Day not in range for month'): DateParser().parse(date_string) def given_utcnow(self, now): datetime_mock = Mock(wraps=datetime) datetime_mock.utcnow = Mock(return_value=now) self.add_patch(patch('dateparser.date_parser.datetime', new=datetime_mock)) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset)) ) def given_date_string(self, date_string): self.date_string = date_string def given_parser(self): def collecting_get_date_data(parse): @wraps(parse) def wrapped(date_string): self.date_result = parse(date_string) return self.date_result return wrapped self.add_patch(patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser() def given_configuration(self, key, value): self.add_patch(patch.object(settings, key, new=value)) def when_date_is_parsed(self): self.result = self.parser.get_date_data(self.date_string) def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_not_parsed(self): self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string) def then_date_was_parsed_by_date_parser(self): self.assertEqual(self.result['date_obj'], self.date_result)
def __init__(self, languages = ['en']): self.parser = DateDataParser(languages=languages)
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('Tues 9th Aug, 2015', datetime(2015, 8, 9)), param('10:04am EDT', datetime(2012, 11, 13, 10, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('13 января 2015 г в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Tagalog dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), param('2016020417:10', datetime(2016, 2, 4, 17, 10)), # Chinese dates param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)), param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)), param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)), param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)), param('2016年6月29', datetime(2016, 6, 29, 0, 0)), param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)), param('2016年9月14日晚8:00', datetime(2016, 9, 14, 20, 0)), ]) def test_dates_parsing(self, date_string, expected): self.given_parser(settings={'NORMALIZE': False, 'RELATIVE_BASE': datetime(2012, 11, 13)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_stringified_datetime_should_parse_fine(self): expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256) self.given_parser(settings={'RELATIVE_BASE': expected_date}) date_string = str(self.parser.get_date_data('today')['date_obj']) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected_date) @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am EDT', datetime(2012, 11, 13, 10, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)), param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), #wrong param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 6, 14, 4)), # Filipino dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), ]) def test_dates_parsing_with_normalization(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'NORMALIZE': True, 'RELATIVE_BASE': datetime(2012, 11, 13)}) self.when_date_is_parsed(normalize_unicode(date_string)) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)), ]) def test_parsing_with_time_zones(self, date_string, expected): self.given_parser(settings={'TO_TIMEZONE': 'UTC'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_parser(settings={'TO_TIMEZONE': 'utc'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string', 'Unable to parse: h'), param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'), param('24h ago', 'Unable to parse: h'), param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00', 'Unable to parser: 00:002015'), param('8 enero 2013 martes 7:03 AM EST 8 enero 2013 martes 7:03 AM EST', 'Unable to parse: 8') ]) def test_dates_not_parsed(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, message) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('10:00PM', datetime(2015, 2, 14, 22, 0)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), param('15 february 15:00', datetime(2015, 2, 15, 15, 0)), param('3/3/50', datetime(1950, 3, 3)), param('3/3/94', datetime(1994, 3, 3)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_parser(settings={'PREFER_DATES_FROM': 'past', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 0)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), param('3/3/50', datetime(2050, 3, 3)), param('3/3/94', datetime(2094, 3, 3)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'PREFER_DATES_FROM': 'current_period', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first', 'RELATIVE_BASE': today}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None): self.given_parser(settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month, 'RELATIVE_BASE': datetime(2015, 2, 12)}) self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) def test_date_is_parsed_when_skip_tokens_are_supplied(self): self.given_parser(settings={'SKIP_TOKENS': ['de'], 'RELATIVE_BASE': datetime(2015, 2, 12)}) self.when_date_is_parsed('24 April 2012 de') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015', 'day must be in 1..28'), param('32 January 2015', 'day must be in 1..31'), param('31 April 2015', 'day must be in 1..30'), param('31 June 2015', 'day must be in 1..30'), param('31 September 2015', 'day must be in 1..30'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['day is out of range for month', message]) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages, settings={'PREFER_LANGUAGE_DATE_ORDER': False}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_local_tz_offset(0) self.given_parser(settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('15-12-18 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YMD'), param('15-18-12 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YDM'), param('10-11-12 06:00', expected=datetime(2012, 10, 11, 6, 0), order='MDY'), param('10-11-12 06:00', expected=datetime(2011, 10, 12, 6, 0), order='MYD'), param('10-11-12 06:00', expected=datetime(2011, 12, 10, 6, 0), order='DYM'), param('15-12-18 06:00', expected=datetime(2018, 12, 15, 6, 0), order='DMY'), param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'), param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'), param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'), param('2016 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'), param('16 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'), ]) def test_order(self, date_string, expected=None, order=None): self.given_parser(settings={'DATE_ORDER': order}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset)) ) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch(patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0])
# -*- coding: utf-8 -*- import scrapy import html2text from dateparser.date import DateDataParser import dateparser import logging dparser = DateDataParser(languages=['en'], try_previous_locales=False) # Set html2text configuration html2text.config.IGNORE_ANCHORS = True html2text.config.IGNORE_IMAGES = True html2text.config.IGNORE_EMPHASIS = True html2text.config.BODY_WIDTH = 0 class EastAfrican(scrapy.Spider): """ Spider for the local news site EastAfrican. Works the same for Business Daily (africa) --> contextsIds=539444 --> 74196 artikelen The citizen (Tanzania) --> contextsIds=1765046 --> 71839 artikelen Daily Nation kenya --> contextsIds=1148 --> 474712 artikelen The east african Kenya -->contextsIds=2456 52513 artikelen (vanaf deze site werken) Daily Monitor Uganda --> contextsIds=691150 --> 174375 artikelen """ name = "Kenya_EastAfrican_spider" download_delay = 2 def start_requests(self):
class TestFreshnessDateDataParser(BaseTestCase): def setUp(self): super(TestFreshnessDateDataParser, self).setUp() self.now = datetime(2014, 9, 1, 10, 30) self.date_string = NotImplemented self.parser = NotImplemented self.result = NotImplemented self.freshness_parser = NotImplemented self.freshness_result = NotImplemented self.date = NotImplemented self.time = NotImplemented @parameterized.expand([ # English dates param('yesterday', ago={'days': 1}, period='day'), param('the day before yesterday', ago={'days': 2}, period='day'), param('today', ago={'days': 0}, period='day'), param('an hour ago', ago={'hours': 1}, period='day'), param('about an hour ago', ago={'hours': 1}, period='day'), param('a day ago', ago={'days': 1}, period='day'), param('a week ago', ago={'weeks': 1}, period='week'), param('one week ago', ago={'weeks': 1}, period='week'), param('2 hours ago', ago={'hours': 2}, period='day'), param('about 23 hours ago', ago={'hours': 23}, period='day'), param('1 year 2 months', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 year, 09 months,01 weeks', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 year 11 months', ago={ 'years': 1, 'months': 11 }, period='month'), param('1 year 12 months', ago={ 'years': 1, 'months': 12 }, period='month'), param('15 hr', ago={'hours': 15}, period='day'), param('15 hrs', ago={'hours': 15}, period='day'), param('2 min', ago={'minutes': 2}, period='day'), param('2 mins', ago={'minutes': 2}, period='day'), param('3 sec', ago={'seconds': 3}, period='day'), param('1000 years ago', ago={'years': 1000}, period='year'), param('2013 years ago', ago={'years': 2013}, period='year'), # We've fixed .now in setUp param('5000 months ago', ago={ 'years': 416, 'months': 8 }, period='month'), param('{} months ago'.format(2013 * 12 + 8), ago={ 'years': 2013, 'months': 8 }, period='month'), param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), param('just now', ago={'seconds': 0}, period='day'), # French dates param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd’hui", ago={'days': 0}, period='day'), param("Aujourdʼhui", ago={'days': 0}, period='day'), param("Aujourdʻhui", ago={'days': 0}, period='day'), param("Aujourd՚hui", ago={'days': 0}, period='day'), param("Aujourdꞌhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Aujourd′hui", ago={'days': 0}, period='day'), param("Aujourd‵hui", ago={'days': 0}, period='day'), param("Aujourdʹhui", ago={'days': 0}, period='day'), param("Aujourd'hui", ago={'days': 0}, period='day'), param("Hier", ago={'days': 1}, period='day'), param("Avant-hier", ago={'days': 2}, period='day'), param('Il ya un jour', ago={'days': 1}, period='day'), param('Il ya une heure', ago={'hours': 1}, period='day'), param('Il ya 2 heures', ago={'hours': 2}, period='day'), param('Il ya environ 23 heures', ago={'hours': 23}, period='day'), param('1 an 2 mois', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 année, 09 mois, 01 semaines', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 an 11 mois', ago={ 'years': 1, 'months': 11 }, period='month'), param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), param('Il y a 40 min', ago={'minutes': 40}, period='day'), # German dates param('Heute', ago={'days': 0}, period='day'), param('Gestern', ago={'days': 1}, period='day'), param('vorgestern', ago={'days': 2}, period='day'), param('vor einem Tag', ago={'days': 1}, period='day'), param('vor einer Stunden', ago={'hours': 1}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('Vor 2 Stunden', ago={'hours': 2}, period='day'), param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'), param('1 Jahr 2 Monate', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 Jahr, 09 Monate, 01 Wochen', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 Jahr 11 Monate', ago={ 'years': 1, 'months': 11 }, period='month'), param('vor 29h', ago={'hours': 29}, period='day'), param('vor 29m', ago={'minutes': 29}, period='day'), param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Italian dates param('oggi', ago={'days': 0}, period='day'), param('ieri', ago={'days': 1}, period='day'), param('2 ore fa', ago={'hours': 2}, period='day'), param('circa 23 ore fa', ago={'hours': 23}, period='day'), param('1 anno 2 mesi', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 anno, 09 mesi, 01 settimane', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 anno 11 mesi', ago={ 'years': 1, 'months': 11 }, period='month'), param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Portuguese dates param('ontem', ago={'days': 1}, period='day'), param('anteontem', ago={'days': 2}, period='day'), param('hoje', ago={'days': 0}, period='day'), param('uma hora atrás', ago={'hours': 1}, period='day'), param('1 segundo atrás', ago={'seconds': 1}, period='day'), param('um dia atrás', ago={'days': 1}, period='day'), param('uma semana atrás', ago={'weeks': 1}, period='week'), param('2 horas atrás', ago={'hours': 2}, period='day'), param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'), param('1 ano 2 meses', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 ano, 09 meses, 01 semanas', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 ano 11 meses', ago={ 'years': 1, 'months': 11 }, period='month'), param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Turkish dates param('Dün', ago={'days': 1}, period='day'), param('Bugün', ago={'days': 0}, period='day'), param('2 saat önce', ago={'hours': 2}, period='day'), param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'), param('1 yıl 2 ay', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 yıl, 09 ay, 01 hafta', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 yıl 11 ay', ago={ 'years': 1, 'months': 11 }, period='month'), param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Russian dates param('сегодня', ago={'days': 0}, period='day'), param('Вчера в', ago={'days': 1}, period='day'), param('вчера', ago={'days': 1}, period='day'), param('2 часа назад', ago={'hours': 2}, period='day'), param('час назад', ago={'hours': 1}, period='day'), param('минуту назад', ago={'minutes': 1}, period='day'), param('2 ч. 21 мин. назад', ago={ 'hours': 2, 'minutes': 21 }, period='day'), param('около 23 часов назад', ago={'hours': 23}, period='day'), param('1 год 2 месяца', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 год, 09 месяцев, 01 недель', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 год 11 месяцев', ago={ 'years': 1, 'months': 11 }, period='month'), param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Czech dates param('Dnes', ago={'days': 0}, period='day'), param('Včera', ago={'days': 1}, period='day'), param('Předevčírem', ago={'days': 2}, period='day'), param('Před 2 hodinami', ago={'hours': 2}, period='day'), param('před přibližně 23 hodin', ago={'hours': 23}, period='day'), param('1 rok 2 měsíce', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 rok, 09 měsíců, 01 týdnů', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 rok 11 měsíců', ago={ 'years': 1, 'months': 11 }, period='month'), param('3 dny', ago={'days': 3}, period='day'), param('3 hodiny', ago={'hours': 3}, period='day'), param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Spanish dates param('anteayer', ago={'days': 2}, period='day'), param('ayer', ago={'days': 1}, period='day'), param('hoy', ago={'days': 0}, period='day'), param('hace una hora', ago={'hours': 1}, period='day'), param('Hace un día', ago={'days': 1}, period='day'), param('Hace una semana', ago={'weeks': 1}, period='week'), param('Hace 2 horas', ago={'hours': 2}, period='day'), param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'), param('1 año 2 meses', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 año, 09 meses, 01 semanas', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 año 11 meses', ago={ 'years': 1, 'months': 11 }, period='month'), param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Chinese dates param('昨天', ago={'days': 1}, period='day'), param('前天', ago={'days': 2}, period='day'), param('2小时前', ago={'hours': 2}, period='day'), param('约23小时前', ago={'hours': 23}, period='day'), param('1年2个月', ago={ 'years': 1, 'months': 2 }, period='month'), param('1年09月,01周', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1年11个月', ago={ 'years': 1, 'months': 11 }, period='month'), param('1年,1月,1周,1天,1小时,1分钟前', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Arabic dates param('اليوم', ago={'days': 0}, period='day'), param('يوم أمس', ago={'days': 1}, period='day'), param('منذ يومين', ago={'days': 2}, period='day'), param('منذ 3 أيام', ago={'days': 3}, period='day'), param('منذ 21 أيام', ago={'days': 21}, period='day'), param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Thai dates param('วันนี้', ago={'days': 0}, period='day'), param('เมื่อวานนี้', ago={'days': 1}, period='day'), param('2 วัน', ago={'days': 2}, period='day'), param('2 ชั่วโมง', ago={'hours': 2}, period='day'), param('23 ชม.', ago={'hours': 23}, period='day'), param('2 สัปดาห์ 3 วัน', ago={ 'weeks': 2, 'days': 3 }, period='day'), param('1 ปี 9 เดือน 1 สัปดาห์', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที', ago={ 'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1 }, period='day'), # Vietnamese dates param('Hôm nay', ago={'days': 0}, period='day'), param('Hôm qua', ago={'days': 1}, period='day'), param('2 giờ', ago={'hours': 2}, period='day'), param('2 tuần 3 ngày', ago={ 'weeks': 2, 'days': 3 }, period='day'), # following test unsupported, refer to discussion at: # http://github.com/scrapinghub/dateparser/issues/33 #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút', # ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1}, # period='day'), # Belarusian dates param('сёння', ago={'days': 0}, period='day'), param('учора ў', ago={'days': 1}, period='day'), param('ўчора', ago={'days': 1}, period='day'), param('пазаўчора', ago={'days': 2}, period='day'), param('2 гадзіны таму назад', ago={'hours': 2}, period='day'), param('2 гадзіны таму', ago={'hours': 2}, period='day'), param('гадзіну назад', ago={'hours': 1}, period='day'), param('хвіліну таму', ago={'minutes': 1}, period='day'), param('2 гадзіны 21 хвіл. назад', ago={ 'hours': 2, 'minutes': 21 }, period='day'), param('каля 23 гадзін назад', ago={'hours': 23}, period='day'), param('1 год 2 месяцы', ago={ 'years': 1, 'months': 2 }, period='month'), param('1 год, 09 месяцаў, 01 тыдзень', ago={ 'years': 1, 'months': 9, 'weeks': 1 }, period='week'), param('2 гады 3 месяцы', ago={ 'years': 2, 'months': 3 }, period='month'), param( '5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад', ago={ 'years': 5, 'months': 1, 'weeks': 6, 'days': 3, 'hours': 5, 'minutes': 1, 'seconds': 3 }, period='day'), # Polish dates param("wczoraj", ago={'days': 1}, period='day'), param("1 godz. 2 minuty temu", ago={ 'hours': 1, 'minutes': 2 }, period='day'), param( "2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu", ago={ 'years': 2, 'months': 3, 'weeks': 1, 'days': 2, 'hours': 4, 'minutes': 15, 'seconds': 25 }, period='day'), param("2 minuty temu", ago={'minutes': 2}, period='day'), param("15 minut temu", ago={'minutes': 15}, period='day'), ]) def test_relative_dates(self, date_string, ago, period): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_date_obj_is_exactly_this_time_ago(ago) self.then_period_is(period) @parameterized.expand([ param('15th of Aug, 2014 Diane Bennett'), ]) def test_insane_dates(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_not_parsed() @parameterized.expand([ param('5000 years ago'), param('2014 years ago'), # We've fixed .now in setUp param('{} months ago'.format(2013 * 12 + 9)), ]) def test_dates_not_supported_by_date_time(self, date_string): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_raised( ValueError, ['year is out of range', "('year must be in 1..9999'"]) @parameterized.expand([ param('несколько секунд назад', boundary={'seconds': 45}, period='day'), param('há alguns segundos', boundary={'seconds': 45}, period='day'), ]) def test_inexplicit_dates(self, date_string, boundary, period): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_error_was_not_raised() self.then_date_was_parsed_by_freshness_parser() self.then_period_is(period) self.then_date_obj_is_between(self.now - timedelta(**boundary), self.now) @parameterized.expand([ param('Today at 9 pm', date(2014, 9, 1), time(21, 0)), param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)), param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)), param('the day before yesterday 16:50', date(2014, 8, 30), time(16, 50)), param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)), param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)), param('Dnes v 12:40', date(2014, 9, 1), time(12, 40)), param('1 week ago at 12:00 am', date(2014, 8, 25), time(0, 0)), ]) def test_freshness_date_with_time(self, date_string, date, time): self.given_parser() self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', 'Asia/Karachi', date(2014, 9, 1), time(13, 30)), param('3 hours ago', 'Europe/Paris', date(2014, 9, 1), time(9, 30)), param('5 hours ago', 'US/Eastern', date(2014, 9, 1), time(1, 30)), # date in DST range param('Today at 9 pm', 'Asia/Karachi', date(2014, 9, 1), time(21, 0)), # time given, hence, no shift applies ]) def test_freshness_date_with_pytz_timezones(self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', 'PKT', date(2014, 9, 1), time(13, 30)), param('5 hours ago', 'EST', date(2014, 9, 1), time(0, 30)), param('3 hours ago', 'MET', date(2014, 9, 1), time(8, 30)), ]) def test_freshness_date_with_timezone_abbreviations( self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) @parameterized.expand([ param('2 hours ago', '+05:00', date(2014, 9, 1), time(13, 30)), param('5 hours ago', '-05:00', date(2014, 9, 1), time(0, 30)), param('3 hours ago', '+01:00', date(2014, 9, 1), time(8, 30)), ]) def test_freshness_date_with_timezone_utc_offset(self, date_string, timezone, date, time): self.given_parser(settings={'TIMEZONE': timezone}) self.given_date_string(date_string) self.when_date_is_parsed() self.then_date_is(date) self.then_time_is(time) def given_date_string(self, date_string): self.date_string = date_string def given_parser(self, settings=None): def collecting_get_date_data(get_date_data): @wraps(get_date_data) def wrapped(*args, **kwargs): self.freshness_result = get_date_data(*args, **kwargs) return self.freshness_result return wrapped self.add_patch( patch.object( freshness_date_parser, 'get_date_data', collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) self.add_patch(patch.object(self.freshness_parser, 'now', self.now)) dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime) dt_mock.utcnow = Mock(return_value=self.now) self.add_patch( patch('dateparser.freshness_date_parser.datetime', new=dt_mock)) self.add_patch( patch('dateparser.date.freshness_date_parser', new=self.freshness_parser)) self.parser = DateDataParser(settings=settings) def when_date_is_parsed(self): try: self.result = self.parser.get_date_data(self.date_string) except Exception as error: self.error = error def then_date_is(self, date): self.assertEqual(date, self.result['date_obj'].date()) def then_time_is(self, time): self.assertEqual(time, self.result['date_obj'].time()) def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_is_between(self, low_boundary, high_boundary): self.assertGreater(self.result['date_obj'], low_boundary) self.assertLess(self.result['date_obj'], high_boundary) def then_date_obj_is_exactly_this_time_ago(self, ago): self.assertEqual(self.now - relativedelta(**ago), self.result['date_obj']) def then_date_was_not_parsed(self): self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string) def then_date_was_parsed_by_freshness_parser(self): self.assertEqual(self.result, self.freshness_result) def then_error_was_not_raised(self): self.assertEqual(NotImplemented, self.error)
class TestDateParser(BaseTestCase): def setUp(self): super(TestDateParser, self).setUp() self.parser = NotImplemented self.result = NotImplemented self.date_parser = NotImplemented self.date_result = NotImplemented @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('Tues 9th Aug, 2015', datetime(2015, 8, 9)), param('10:04am', datetime(2012, 11, 13, 10, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm', datetime(2014, 11, 25, 22, 17)), param('Wed Aug 05 12:00:00 2015', datetime(2015, 8, 5, 12, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), param('21 January 2012 13:11:23.678', datetime(2012, 1, 21, 13, 11, 23, 678000)), param('1/1/16 9:02:43.1', datetime(2016, 1, 1, 9, 2, 43, 100000)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com # NOQA param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 13, 14, 4)), # Tagalog dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), param('2016020417:10', datetime(2016, 2, 4, 17, 10)), # Chinese dates param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)), param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)), param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)), param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)), param('2016年6月29', datetime(2016, 6, 29, 0, 0)), param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)), param('2016年9月14日晚8:00', datetime(2016, 9, 14, 20, 0)), # Bulgarian param('25 ян 2016', datetime(2016, 1, 25, 0, 0)), param('23 декември 2013 15:10:01', datetime(2013, 12, 23, 15, 10, 1)), # Bangla dates param('[সেপ্টেম্বর] 04, 2014.', datetime(2014, 9, 4)), param('মঙ্গলবার জুলাই 22, 2014', datetime(2014, 7, 22)), param('শুক্রবার', datetime(2012, 11, 9)), param('শুক্র, 12 ডিসেম্বর 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('1লা জানুয়ারী 2015', datetime(2015, 1, 1)), param('25শে মার্চ 1971', datetime(1971, 3, 25)), param('8ই মে 2002', datetime(2002, 5, 8)), param('10:06am ডিসেম্বর 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 ফেব্রুয়ারী 2013 সাল 09:10', datetime(2013, 2, 19, 9, 10)), # Hindi dates param('11 जुलाई 1994, 11:12', datetime(1994, 7, 11, 11, 12)), param('१७ अक्टूबर २०१८', datetime(2018, 10, 17, 0, 0)), param('12 जनवरी 1997 11:08 अपराह्न', datetime(1997, 1, 12, 23, 8)), # Georgian dates param('2011 წლის 17 მარტი, ოთხშაბათი', datetime(2011, 3, 17, 0, 0)), param('2015 წ. 12 ივნ, 15:34', datetime(2015, 6, 12, 15, 34)) ]) def test_dates_parsing(self, date_string, expected): self.given_parser(settings={ 'NORMALIZE': False, 'RELATIVE_BASE': datetime(2012, 11, 13) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) def test_stringified_datetime_should_parse_fine(self): expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256) self.given_parser(settings={'RELATIVE_BASE': expected_date}) date_string = str(self.parser.get_date_data('today')['date_obj']) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected_date) @parameterized.expand([ # English dates param('[Sept] 04, 2014.', datetime(2014, 9, 4)), param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)), param('10:04am', datetime(2012, 11, 13, 10, 4)), param('Friday', datetime(2012, 11, 9)), param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)), param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)), param('Nov 25 2014 10:17 pm', datetime(2014, 11, 25, 22, 17)), param('Wed Aug 05 12:00:00 2015', datetime(2015, 8, 5, 12, 0)), param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)), param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)), param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)), param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)), param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)), param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)), param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)), # French dates param('11 Mai 2014', datetime(2014, 5, 11)), param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)), param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), # wrong param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)), param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)), param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)), param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)), param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)), param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)), # Spanish dates param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)), param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)), param('12 de junio del 2012', datetime(2012, 6, 12)), param('13 Ago, 2014', datetime(2014, 8, 13)), param('13 Septiembre, 2014', datetime(2014, 9, 13)), param('11 Marzo, 2014', datetime(2014, 3, 11)), param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)), param('Vi 17:15', datetime(2012, 11, 9, 17, 15)), # Dutch dates param('11 augustus 2014', datetime(2014, 8, 11)), param('14 januari 2014', datetime(2014, 1, 14)), param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)), # Italian dates param('16 giu 2014', datetime(2014, 6, 16)), param('26 gennaio 2014', datetime(2014, 1, 26)), param('Ven 18:23', datetime(2012, 11, 9, 18, 23)), # Portuguese dates param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)), param('13 Setembro, 2014', datetime(2014, 9, 13)), param('Sab 3:03', datetime(2012, 11, 10, 3, 3)), # Russian dates param('10 мая', datetime(2012, 5, 10)), # forum.codenet.ru param('26 апреля', datetime(2012, 4, 26)), param('20 ноября 2013', datetime(2013, 11, 20)), param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)), param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)), param('09 августа 2012', datetime(2012, 8, 9, 0, 0)), param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)), param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)), param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)), param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)), param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)), # Turkish dates param('11 Ağustos, 2014', datetime(2014, 8, 11)), param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)), # forum.andronova.net param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)), param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)), # forum.ceviz.net # Romanian dates param('13 iunie 2013', datetime(2013, 6, 13)), param('14 aprilie 2014', datetime(2014, 4, 14)), param('18 martie 2012', datetime(2012, 3, 18)), param('S 14:14', datetime(2012, 11, 10, 14, 14)), param('12-Iun-2013', datetime(2013, 6, 12)), # German dates param('21. Dezember 2013', datetime(2013, 12, 21)), param('19. Februar 2012', datetime(2012, 2, 19)), param('26. Juli 2014', datetime(2014, 7, 26)), param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)), param('12-Mär-2014', datetime(2014, 3, 12)), param('Mit 13:14', datetime(2012, 11, 7, 13, 14)), # Czech dates param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)), param('13 Srpen, 2014', datetime(2014, 8, 13)), param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)), # Thai dates param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)), param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)), param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)), param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)), param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)), # Vietnamese dates param('Thứ năm', datetime(2012, 11, 8)), # Thursday param('Thứ sáu', datetime(2012, 11, 9)), # Friday param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)), # bpsosrcs.wordpress.com # NOQA param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)), # Belarusian dates param('11 траўня', datetime(2012, 5, 11)), param('4 мая', datetime(2012, 5, 4)), param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)), param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)), param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)), # Ukrainian dates param('2015-кві-12', datetime(2015, 4, 12)), param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)), param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)), param('вів о 14:04', datetime(2012, 11, 13, 14, 4)), # Filipino dates param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)), param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)), param('2 hun', datetime(2012, 6, 2)), param('Lin 16:16', datetime(2012, 11, 11, 16, 16)), # Japanese dates param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)), param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)), # Bangla dates param('[সেপ্টেম্বর] 04, 2014.', datetime(2014, 9, 4)), param('মঙ্গলবার জুলাই 22, 2014', datetime(2014, 7, 22)), param('শুক্রবার', datetime(2012, 11, 9)), param('শুক্র, 12 ডিসেম্বর 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)), param('1লা জানুয়ারী 2015', datetime(2015, 1, 1)), param('25শে মার্চ 1971', datetime(1971, 3, 25)), param('8ই মে 2002', datetime(2002, 5, 8)), param('10:06am ডিসেম্বর 11, 2014', datetime(2014, 12, 11, 10, 6)), param('19 ফেব্রুয়ারী 2013 সাল 09:10', datetime(2013, 2, 19, 9, 10)), # Numeric dates param('06-17-2014', datetime(2014, 6, 17)), param('13/03/2014', datetime(2014, 3, 13)), param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)), # Miscellaneous dates param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)), param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)), param('1 сер 2015', datetime(2015, 8, 1, 0, 0)), # Bulgarian param('24 ян 2015г.', datetime(2015, 1, 24, 0, 0)), # Hindi dates param('बुधवार 24 मई 1997 12:09', datetime(1997, 5, 24, 12, 9)), param('28 दिसम्बर 2000 , 01:09:08', datetime(2000, 12, 28, 1, 9, 8)), param('१६ दिसम्बर १९७१', datetime(1971, 12, 16, 0, 0)), param('सन् 1989 11 फ़रवरी 09:43', datetime(1989, 2, 11, 9, 43)), ]) def test_dates_parsing_with_normalization(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={ 'NORMALIZE': True, 'RELATIVE_BASE': datetime(2012, 11, 13) }) self.when_date_is_parsed(normalize_unicode(date_string)) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)), param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)), param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)), param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)), ]) def test_parsing_with_time_zones_and_converting_to_UTC( self, date_string, expected): self.given_parser(settings={'TO_TIMEZONE': 'UTC'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_timezone_parsed_is('UTC') self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('Sep 03 2014 | 4:32 pm EDT', 'EDT', datetime(2014, 9, 3, 16, 32)), param('17th October, 2034 @ 01:08 am PDT', 'PDT', datetime(2034, 10, 17, 1, 8)), param('15 May 2004 23:24 EDT', 'EDT', datetime(2004, 5, 15, 23, 24)), param('08/17/14 17:00 (PDT)', 'PDT', datetime(2014, 8, 17, 17, 0)), param('15 May 2004 16:10 -0400', '-04:00', datetime(2004, 5, 15, 16, 10)), param('1999-12-31 19:00:00 -0500', '-05:00', datetime(1999, 12, 31, 19, 0)), param('1999-12-31 19:00:00 +0500', '+05:00', datetime(1999, 12, 31, 19, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', '-07:00', datetime(2005, 9, 9, 13, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', '+00:00', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_dateparser_should_return_tzaware_date_when_tz_info_present_in_date_string( self, date_string, timezone_str, expected): self.given_parser() self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_timezone_parsed_is(timezone_str) self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', 'UTC', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', 'UTC', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', 'UTC', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', 'GMT', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', 'GMT', datetime(2005, 9, 9, 13, 51, 39)), ]) def test_dateparser_should_return_date_in_setting_timezone_if_timezone_info_present_both_in_datestring_and_given_in_settings( self, date_string, setting_timezone, expected): self.given_parser(settings={'TIMEZONE': setting_timezone}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_timezone_parsed_is(setting_timezone) self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)), param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)), param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)), param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)), param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)), param('Fri Sep 23 2016 10:34:51 GMT+0800 (CST)', datetime(2016, 9, 23, 2, 34, 51)), ]) def test_parsing_with_utc_offsets(self, date_string, expected): self.given_parser(settings={'TO_TIMEZONE': 'utc'}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_period_is('day') self.then_timezone_parsed_is('UTC') self.then_date_obj_exactly_is(expected) def test_empty_dates_string_is_not_parsed(self): self.when_date_is_parsed_by_date_parser('') self.then_error_was_raised(ValueError, ["Empty string"]) @parameterized.expand([ param('invalid date string', 'Unable to parse: invalid'), param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'), param('24h ago', 'Unable to parse: h'), param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00', 'Unable to parse: 00:002015'), param( '8 enero 2013 martes 7:03 AM EST 8 enero 2013 martes 7:03 AM EST', 'Unable to parse: 8'), param('12/09/18567', 'Unable to parse: 18567'), ]) def test_dates_not_parsed(self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, message) @parameterized.expand([ param('10 December', datetime(2014, 12, 10)), param('March', datetime(2014, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Monday', datetime(2015, 2, 9)), param('Sunday', datetime(2015, 2, 8)), # current day param('10:00PM', datetime(2015, 2, 14, 22, 0)), param('16:10', datetime(2015, 2, 14, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), param('15 february 15:00', datetime(2015, 2, 15, 15, 0)), param('3/3/50', datetime(1950, 3, 3)), param('3/3/94', datetime(1994, 3, 3)), ]) def test_preferably_past_dates(self, date_string, expected): self.given_parser( settings={ 'PREFER_DATES_FROM': 'past', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 20)), param('Sunday', datetime(2015, 2, 22)), # current day param('Monday', datetime(2015, 2, 16)), param('10:00PM', datetime(2015, 2, 15, 22, 0)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 16, 14, 5)), param('3/3/50', datetime(2050, 3, 3)), param('3/3/94', datetime(2094, 3, 3)), ]) def test_preferably_future_dates(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser( settings={ 'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', datetime(2015, 12, 10)), param('March', datetime(2015, 3, 15)), param('Friday', datetime(2015, 2, 13)), param('Sunday', datetime(2015, 2, 15)), # current weekday param('10:00PM', datetime(2015, 2, 15, 22, 00)), param('16:10', datetime(2015, 2, 15, 16, 10)), param('14:05', datetime(2015, 2, 15, 14, 5)), ]) def test_dates_without_preference(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser( settings={ 'PREFER_DATES_FROM': 'current_period', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)), param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)), ]) def test_dates_with_day_missing_preferring_current_day_of_month( self, date_string, today=None, expected=None): self.given_parser(settings={ 'PREFER_DAY_OF_MONTH': 'current', 'RELATIVE_BASE': today }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)), param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)), ]) def test_dates_with_day_missing_preferring_last_day_of_month( self, date_string, today=None, expected=None): self.given_parser(settings={ 'PREFER_DAY_OF_MONTH': 'last', 'RELATIVE_BASE': today }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)), param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)), param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)), param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)), param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)), param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)), ]) def test_dates_with_day_missing_preferring_first_day_of_month( self, date_string, today=None, expected=None): self.given_parser(settings={ 'PREFER_DAY_OF_MONTH': 'first', 'RELATIVE_BASE': today }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param(prefer_day_of_month='current'), param(prefer_day_of_month='last'), param(prefer_day_of_month='first'), ]) def test_that_day_preference_does_not_affect_dates_with_explicit_day( self, prefer_day_of_month=None): self.given_parser( settings={ 'PREFER_DAY_OF_MONTH': prefer_day_of_month, 'RELATIVE_BASE': datetime(2015, 2, 12) }) self.when_date_is_parsed('24 April 2012') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) def test_date_is_parsed_when_skip_tokens_are_supplied(self): self.given_parser(settings={ 'SKIP_TOKENS': ['de'], 'RELATIVE_BASE': datetime(2015, 2, 12) }) self.when_date_is_parsed('24 April 2012 de') self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(datetime(2012, 4, 24)) @parameterized.expand([ param('29 February 2015', 'day must be in 1..28'), param('32 January 2015', 'day must be in 1..31'), param('31 April 2015', 'day must be in 1..30'), param('31 June 2015', 'day must be in 1..30'), param('31 September 2015', 'day must be in 1..30'), ]) def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number( self, date_string, message): self.when_date_is_parsed_by_date_parser(date_string) self.then_error_was_raised(ValueError, ['day is out of range for month', message]) @parameterized.expand([ param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)), param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)), ]) def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected): self.given_local_tz_offset(0) self.given_parser(languages=languages, settings={'PREFER_LOCALE_DATE_ORDER': False}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.result['date_obj'] = self.result['date_obj'].replace(tzinfo=None) self.then_date_obj_exactly_is(expected) @parameterized.expand([ # Epoch timestamps. param('1484823450', expected=datetime(2017, 1, 19, 10, 57, 30)), param('1436745600000', expected=datetime(2015, 7, 13, 0, 0)), param('1015673450', expected=datetime(2002, 3, 9, 11, 30, 50)), param('2016-09-23T02:54:32.845Z', expected=datetime(2016, 9, 23, 2, 54, 32, 845000, tzinfo=StaticTzInfo('Z', timedelta(0)))) ]) def test_parse_timestamp(self, date_string, expected): self.given_local_tz_offset(0) self.given_parser(settings={'TO_TIMEZONE': 'UTC'}) self.when_date_is_parsed(date_string) self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10 December', expected=datetime(2015, 12, 10), period='day'), param('March', expected=datetime(2015, 3, 15), period='month'), param('April', expected=datetime(2015, 4, 15), period='month'), param('December', expected=datetime(2015, 12, 15), period='month'), param('Friday', expected=datetime(2015, 2, 13), period='day'), param('Monday', expected=datetime(2015, 2, 9), period='day'), param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'), param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'), param('2014', expected=datetime(2014, 2, 15), period='year'), param('2008', expected=datetime(2008, 2, 15), period='year'), ]) def test_extracted_period(self, date_string, expected=None, period=None): self.given_local_tz_offset(0) self.given_parser( settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('12th December 2019 19:00', expected=datetime(2019, 12, 12, 19, 0), period='time'), param('9 Jan 11 0:00', expected=datetime(2011, 1, 9, 0, 0), period='time'), ]) def test_period_is_time_if_return_time_as_period_setting_applied_and_time_component_present( self, date_string, expected=None, period=None): self.given_parser(settings={'RETURN_TIME_AS_PERIOD': True}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('16:00', expected=datetime(2018, 12, 13, 16, 0), period='time'), param('Monday 7:15 AM', expected=datetime(2018, 12, 10, 7, 15), period='time'), ]) def test_period_is_time_if_return_time_as_period_and_relative_base_settings_applied_and_time_component_present( self, date_string, expected=None, period=None): self.given_parser( settings={ 'RETURN_TIME_AS_PERIOD': True, 'RELATIVE_BASE': datetime(2018, 12, 13, 15, 15) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('12th March 2010', expected=datetime(2010, 3, 12, 0, 0), period='day'), param('21-12-19', expected=datetime(2019, 12, 21, 0, 0), period='day'), ]) def test_period_is_day_if_return_time_as_period_setting_applied_and_time_component_is_not_present( self, date_string, expected=None, period=None): self.given_parser(settings={'RETURN_TIME_AS_PERIOD': True}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('16:00', expected=datetime(2017, 1, 10, 16, 0), period='day'), param('Monday 7:15 AM', expected=datetime(2017, 1, 9, 7, 15), period='day'), ]) def test_period_is_day_if_return_time_as_period_setting_not_applied( self, date_string, expected=None, period=None): self.given_parser( settings={ 'RETURN_TIME_AS_PERIOD': False, 'RELATIVE_BASE': datetime(2017, 1, 10, 15, 15) }) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) self.then_period_is(period) @parameterized.expand([ param('15-12-18 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YMD'), param('15-18-12 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YDM'), param('10-11-12 06:00', expected=datetime(2012, 10, 11, 6, 0), order='MDY'), param('10-11-12 06:00', expected=datetime(2011, 10, 12, 6, 0), order='MYD'), param('10-11-12 06:00', expected=datetime(2011, 12, 10, 6, 0), order='DYM'), param('15-12-18 06:00', expected=datetime(2018, 12, 15, 6, 0), order='DMY'), param('12/09/08 04:23:15.567', expected=datetime(2008, 9, 12, 4, 23, 15, 567000), order='DMY'), param('10/9/1914 03:07:09.788888 pm', expected=datetime(1914, 10, 9, 15, 7, 9, 788888), order='MDY'), param('1-8-09 07:12:49 AM', expected=datetime(2009, 1, 8, 7, 12, 49), order='MDY'), param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'), param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'), param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'), param('2016 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'), param('16 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'), param('Sunday 23 May 1856 12:09:08 AM', expected=datetime(1856, 5, 23, 0, 9, 8), order='DMY'), ]) def test_order(self, date_string, expected=None, order=None): self.given_parser(settings={'DATE_ORDER': order}) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('10.1.2019', expected=datetime(2019, 1, 10, 0, 0), languages=['de'], settings={'PREFER_DAY_OF_MONTH': 'first'}), param('10.1.2019', expected=datetime(2019, 1, 10, 0, 0), languages=['de']), param('10.1.2019', expected=datetime(2019, 10, 1, 0, 0), settings={'DATE_ORDER': 'MDY'}), param('03/11/2559 05:13', datetime(2559, 3, 11, 5, 13), languages=["th"], settings={"DATE_ORDER": "MDY"}), param('03/15/2559 05:13', datetime(2559, 3, 15, 5, 13), languages=["th"], settings={"DATE_ORDER": "MDY"}) ]) def test_if_settings_provided_date_order_is_retained( self, date_string, expected=None, languages=None, settings=None): self.given_parser(languages=languages, settings=settings) self.when_date_is_parsed(date_string) self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) @parameterized.expand([ param('::', None), param('..', None), param(' ', None), param('--', None), param('//', None), param('++', None), ]) def test_parsing_strings_containing_only_separator_tokens( self, date_string, expected): self.given_parser() self.when_date_is_parsed(date_string) self.then_period_is('day') self.then_date_obj_exactly_is(expected) def given_local_tz_offset(self, offset): self.add_patch( patch.object(dateparser.timezone_parser, 'local_tz_offset', new=timedelta(seconds=3600 * offset))) def given_parser(self, *args, **kwds): def collecting_get_date_data(parse): @wraps(parse) def wrapped(*args, **kwargs): self.date_result = parse(*args, **kwargs) return self.date_result return wrapped self.add_patch( patch.object(date_parser, 'parse', collecting_get_date_data(date_parser.parse))) self.date_parser = Mock(wraps=date_parser) self.add_patch( patch('dateparser.date.date_parser', new=self.date_parser)) self.parser = DateDataParser(*args, **kwds) def when_date_is_parsed(self, date_string): self.result = self.parser.get_date_data(date_string) def when_date_is_parsed_by_date_parser(self, date_string): try: self.result = DateParser().parse(date_string) except Exception as error: self.error = error def then_period_is(self, period): self.assertEqual(period, self.result['period']) def then_date_obj_exactly_is(self, expected): self.assertEqual(expected, self.result['date_obj']) def then_date_was_parsed_by_date_parser(self): self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed") self.assertEqual(self.result['date_obj'], self.date_result[0]) def then_timezone_parsed_is(self, tzstr): self.assertTrue(tzstr in repr(self.result['date_obj'].tzinfo)) self.result['date_obj'] = self.result['date_obj'].replace(tzinfo=None)
fmt = None try: fmt = params[0]['value'] except Exception, e: pass if fmt: try: d = parse(value) value = arrow.get(d, 'Asia/Shanghai').format(fmt) except Exception, e: try: value = arrow.get(value, 'Asia/Shanghai').format(fmt) except Exception, e: try: ddp = DateDataParser() ret = ddp.get_date_data(value) dateobj = ret['date_obj'] ts = arrow.get(dateobj).timestamp + 8 * 60 * 60 value = arrow.get(ts).format(fmt) except Exception, e: pass return value def substrFilter(value, params=None): p = p1 = None try: p = params[0]['value'] p1 = params[1]['value']