Exemple #1
0
	def __call__(self,values):
		ddp = DateDataParser()
		values = ''.join(values)
		values = values.replace(u'\u00ab',"")
		values = values.replace(u'\u00bb',"")
		dateobj = ddp.get_date_data(values)['date_obj']
		return dateobj.strftime("%B %d, %Y, %H:%M:%S")
Exemple #2
0
 def __init__(self, config=None):
     self.active_reminder = {}
     self.regex = r'\[(.*)\]'
     self.settings = {'PREFER_DATES_FROM': 'future', 'DATE_ORDER': 'DMY'}
     self.parser = DateDataParser(languages=['en'],
                                  allow_redetect_language=False,
                                  settings=self.settings)
    def given_parser(self, settings=None):
        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(*args, **kwargs):
                self.freshness_result = get_date_data(*args, **kwargs)
                return self.freshness_result

            return wrapped

        self.add_patch(
            patch.object(
                freshness_date_parser, 'get_date_data',
                collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)
        self.add_patch(patch.object(self.freshness_parser, 'now', self.now))

        dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime)
        dt_mock.utcnow = Mock(return_value=self.now)
        self.add_patch(
            patch('dateparser.freshness_date_parser.datetime', new=dt_mock))
        self.add_patch(
            patch('dateparser.date.freshness_date_parser',
                  new=self.freshness_parser))
        self.parser = DateDataParser(settings=settings)
Exemple #4
0
 def __call__(self, values):
     values = super(Date, self).__call__(values)
     dates = []
     for text in values:
         if isinstance(text, (dict, list)):
             dates.append(text)
         try:
             date = DateDataParser().get_date_data(text)['date_obj']
             dates.append(date.strftime(self.format))
         except ValueError:
             pass
     return dates
Exemple #5
0
 def __call__(self, values):
     values = super(Date, self).__call__(values)
     dates = []
     for text in values:
         if isinstance(text, (dict, list)):
             dates.append(text)
         try:
             date = DateDataParser().get_date_data(text)['date_obj']
             dates.append(date.strftime(self.format))
         except ValueError:
             pass
     return dates
Exemple #6
0
 def search_parse(self, shortname, text, settings):
     translated, original = self.search(shortname, text, settings)
     bad_translate_with_search = ['vi', 'hu']   # splitting done by spaces and some dictionary items contain spaces
     if shortname not in bad_translate_with_search:
         parser = DateDataParser(languages=['en'], settings=settings)
         parsed, substrings = self.parse_found_objects(parser=parser, to_parse=translated,
                                                       original=original, translated=translated, settings=settings)
     else:
         parser = DateDataParser(languages=[shortname], settings=settings)
         parsed, substrings = self.parse_found_objects(parser=parser, to_parse=original,
                                                       original=original, translated=translated, settings=settings)
     parser._settings = Settings()
     return list(zip(substrings, [i['date_obj'] for i in parsed]))
Exemple #7
0
 def search_parse(self, shortname, text, settings):
     translated, original = self.search(shortname, text, settings)
     bad_translate_with_search = ['vi', 'hu']   # splitting done by spaces and some dictionary items contain spaces
     if shortname not in bad_translate_with_search:
         parser = DateDataParser(languages=['en'], settings=settings)
         parsed, substrings = self.parse_found_objects(parser=parser, to_parse=translated,
                                                       original=original, translated=translated, settings=settings)
     else:
         parser = DateDataParser(languages=[shortname], settings=settings)
         parsed, substrings = self.parse_found_objects(parser=parser, to_parse=original,
                                                       original=original, translated=translated, settings=settings)
     parser._settings = Settings()
     return list(zip(substrings, [i['date_obj'] for i in parsed]))
    def given_parser(self):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result
            return wrapped
        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser()
Exemple #9
0
 def __call__(self, values):
     values = super(Date, self).__call__(values)
     dates = []
     for text in values:
         if isinstance(text, (dict, list)):
             dates.append(text)
         try:
             date = DateDataParser(settings={
                 'PREFER_DAY_OF_MONTH': 'first'
             }).get_date_data(text)['date_obj']
             dates.append(date.strftime(self.format))
         except ValueError:
             pass
         except AttributeError:
             pass
     return dates
    def given_parser(self):
        self.add_patch(patch.object(freshness_date_parser, 'now', self.now))

        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(date_string):
                self.freshness_result = get_date_data(date_string)
                return self.freshness_result
            return wrapped
        self.add_patch(patch.object(freshness_date_parser,
                                    'get_date_data',
                                    collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)
        self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser))
        self.parser = DateDataParser()
Exemple #11
0
 def __init__(self, config=None):
     self.active_reminder = {}
     self.regex = r'\[(.*)\]'
     self.settings = {'PREFER_DATES_FROM': 'future',
                      'DATE_ORDER': 'DMY'}
     self.parser = DateDataParser(languages=['en'],
                                  allow_redetect_language=False,
                                  settings=self.settings)
Exemple #12
0
 def parse_url(self, response):
     date = response.xpath("//span[@class='highwire-cite-metadata-date highwire-cite-metadata']//text()").extract_first()
     release_date = DateDataParser().get_date_data(date)['date_obj'].strftime("%Y-%m-%d")
     for article in response.xpath("//a[@class='highwire-cite-linked-title']"):
         url = urlparse.urljoin(response.url, article.xpath("./@href").extract_first())
         yield {
             "url" : url,
             "release_date" : release_date
         }
 def __init__(self, config):
     super(RelevancePeriodExtractor, self).__init__(config)
     timeliness_params = self.config['timeliness']
     self.extract_period = timeliness_params.get('extract_period', False)
     self.timeliness_strategy = timeliness_params.get('timeliness_strategy', [])
     self.date_order = timeliness_params.get('date_order', 'DMY')
     self.max_empty_relevance_period = timeliness_params.get('max_empty_relevance_period', 10)
     if not self.timeliness_strategy:
         raise ValueError('You need to provide values for "timeliness_strategy."')
     datapackage_check = DataPackageChecker(self.config)
     datapackage_check.check_database_completeness([self.source_file])
     settings = {'RETURN_AS_TIMEZONE_AWARE': False,
                 'PREFER_DAY_OF_MONTH': 'last',
                 'PREFER_DATES_FROM': 'past',
                 'SKIP_TOKENS': ['to'],
                 'DATE_ORDER': self.date_order}
     self.date_parser = DateDataParser(allow_redetect_language=True,
                                       settings=settings)
def parse_html(html):
    """Parse data from string containing HTML.

    Returns a DataFrame.
    """

    soup = BeautifulSoup(html, 'html.parser')
    df = find_data(soup)

    # append original date column
    original_week_period = find_week_date(soup)
    df['original_week_period'] = original_week_period

    # extract date value from text
    date_search = re.search('(\d+\s+\w+\s+\d{2,4})$', original_week_period,
                            re.IGNORECASE)
    if date_search:
        original_date_text = date_search.group(1)
        df['original_date_text'] = original_date_text
    else:
        raise Exception(
            f"Couldn't extract date from date text {original_week_period}.")

    # parse date
    ddp = DateDataParser(languages=['tr'], settings={'DATE_ORDER': 'DMY'})
    df['date'] = ddp.get_date_data(original_date_text).date_obj

    # parse numeric 'vaka sayısı' figure using TR locale
    locale.setlocale(locale.LC_NUMERIC, 'tr_TR')
    df['data-detay'] = df['data-detay'].apply(locale.atof)

    # remove dash from column names
    df = df.rename(columns={
        'data-adi': 'data_adi',
        'data-detay': 'data_detay'
    })

    return df[[
        'data_adi', 'data_detay', 'original_week_period', 'original_date_text',
        'date'
    ]]
    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result

            return wrapped

        self.add_patch(patch.object(date_parser, "parse", collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch("dateparser.date.date_parser", new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)
    def given_parser(self):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result
            return wrapped
        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser()
Exemple #17
0
class Parser():
    def __init__(self, languages = ['en']):
        self.parser = DateDataParser(languages=languages)

    @lru_cache(maxsize=256)
    def _parse(self, s):
        return self.parser.get_date_data(s).get('date_obj')

    def parse_date(self, d,t):
        try:
            delta = datetime.now() - self._parse(d)
            truncated = False
        except TypeError:
            delta = timedelta(days = 30)
            truncated = True
        scraped = datetime.fromisoformat(t)
        return datetime.date(scraped - delta), truncated
    def given_parser(self):

        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(date_string):
                self.freshness_result = get_date_data(date_string)
                return self.freshness_result
            return wrapped
        self.add_patch(patch.object(freshness_date_parser,
                                    'get_date_data',
                                    collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)

        dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime)
        dt_mock.utcnow = Mock(return_value=self.now)
        self.add_patch(patch('dateparser.freshness_date_parser.datetime', new=dt_mock))
        self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser))
        self.parser = DateDataParser()
    def _get_date_delta(self, date_val: str):
        field_ref = self._get_field_ref("tweak")
        if field_ref:
            tweak, _ = fpe_base.cleanup_value(field_ref.value, field_ref.radix)
            tweak = str(tweak).zfill(16)
            tweak_val = self._fpe_ff1.encrypt(tweak.encode(), field_ref.radix)
        else:
            tweak = "0000000000000000"
            tweak_val = self._fpe_ff1.encrypt(tweak.encode())

        tweak_val = self._fpe_ff1.decode(tweak_val)
        days = int(tweak_val) % self.range + self.lower_range_days
        _date_val = None
        if self.format:
            try:
                _date_val = datetime.strptime(date_val, self.format).date()
            except ValueError:
                pass
        if not _date_val:
            _date_val = DateDataParser(settings={
                "STRICT_PARSING": True
            }).get_date_data(date_val)
            _date_val = _date_val["date_obj"].date()
        return days, _date_val
Exemple #20
0
 def when_date_is_parsed_using_with_datedataparser(self, dt_string):
     ddp = DateDataParser(detect_languages_function=detect_languages)
     self.result = ddp.get_date_data(dt_string)["date_obj"]
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.date_string = NotImplemented
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 \xe0 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er F\xe9vrier \xe0 21:24',
              datetime(2012, 2, 1, 21, 24)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        # Turkish dates
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 13:13 AM', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        # Miscellaneous
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('vendredi, d\xe9cembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 D\xe9c 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 D\xe9cembre 2014 \xe0 09:00',
              datetime(2014, 12, 11, 9, 0)),
        param('f\xe9v 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)),
        param('17th October, 2034 @ 01:08 am PDT',
              datetime(2034, 10, 17, 9, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(''),
        param('invalid date string'),
        param('Aug 7, 2014Aug 7, 2014'),
    ])
    def test_dates_not_parsed(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_not_parsed()

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 14, 22, 00)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'past')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'future')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'current_period')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 31),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 25)),
        param('April 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 28)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'current')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 1),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 1),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 31)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 30)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'last')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 8),
              expected=datetime(2015, 2, 1)),
        param('February 2012',
              today=datetime(2015, 1, 7),
              expected=datetime(2012, 2, 1)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 1)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 1)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 1)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'first')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(
            self, prefer_day_of_month=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month)
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser()
        self.given_date_string('24 April 2012')
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015'),
        param('32 January 2015'),
        param('31 April 2015'),
        param('31 June 2015'),
        param('31 September 2015'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(
            self, date_string):
        with self.assertRaisesRegexp(ValueError, 'Day not in range for month'):
            DateParser().parse(date_string)

    def given_utcnow(self, now):
        datetime_mock = Mock(wraps=datetime)
        datetime_mock.utcnow = Mock(return_value=now)
        self.add_patch(
            patch('dateparser.date_parser.datetime', new=datetime_mock))

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset)))

    def given_date_string(self, date_string):
        self.date_string = date_string

    def given_parser(self):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result

            return wrapped

        self.add_patch(
            patch.object(date_parser, 'parse',
                         collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(
            patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser()

    def given_configuration(self, key, value):
        self.add_patch(patch.object(settings, key, new=value))

    def when_date_is_parsed(self):
        self.result = self.parser.get_date_data(self.date_string)

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_not_parsed(self):
        self.assertIsNone(self.result['date_obj'],
                          '"%s" should not be parsed' % self.date_string)

    def then_date_was_parsed_by_date_parser(self):
        self.assertEqual(self.result['date_obj'], self.date_result)
Exemple #22
0
 def __init__(self, host, user, password):
     self.jira = JIRA(host, basic_auth=(user, password), max_retries=1)
     self.ddp = DateDataParser(languages=['pt', 'en'])
Exemple #23
0
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін',
              datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Tagalog dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
        # Chinese dates
        param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)),
        param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)),
        param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)),
        # Greek dates
        param('19 Ιουνίου 2016', datetime(2016, 6, 19, 0, 0)),
        param('8 Ιανουαρίου 2015', datetime(2015, 1, 8, 0, 0)),
        param('4 Μαρτίου 2015', datetime(2015, 3, 4, 0, 0)),
        param('29 Δεκεμβρίου 2015', datetime(2015, 12, 29, 0, 0)),
        param('4 Απριλίου 2015', datetime(2015, 4, 4, 0, 0)),
        param('19 Φεβρουαρίου 2015', datetime(2015, 2, 19, 0, 0)),
        param('16 Μαΐου 2015', datetime(2015, 5, 16, 0, 0)),
        param('21 Αυγούστου 2014', datetime(2014, 8, 21, 0, 0)),
        param('30 Σεπτεμβρίου 2014', datetime(2014, 9, 30, 0, 0)),
        param('24 Οκτωβρίου 2014', datetime(2014, 10, 24, 0, 0)),
        param('1 Ιουλίου 2014', datetime(2014, 7, 1, 0, 0)),
        param('27 Νοεμβρίου 2014', datetime(2014, 11, 27, 0, 0)),
        # Arabic dates
        param('١٦ أكتوبر، ٢٠١٥', datetime(2015, 10, 16, 0, 0)),
        param('١٦ يونيو، ٢٠١٦', datetime(2016, 6, 16, 0, 0)),
        # Korean
        param('2016년 6월 18일', datetime(2016, 6, 18, 0, 0)),
        # Hindi
        param('27 अगस्त 2014', datetime(2014, 8, 27, 0, 0)),
        param('8 दिसंबर 2014', datetime(2014, 12, 8, 0, 0)),
        param('23 फ़रवरी 2014', datetime(2014, 2, 23, 0, 0)),
        param('10 सितंबर 2014', datetime(2014, 9, 10, 0, 0)),
        param('11 अक्तूबर 2014', datetime(2014, 10, 11, 0, 0)),
        param('12 नवंबर 2014', datetime(2014, 11, 12, 0, 0)),
        param('16 जनवरी 2014', datetime(2014, 1, 16, 0, 0)),
        param('1 जून 2014', datetime(2014, 6, 1, 0, 0)),
        param('25 अप्रैल 2014', datetime(2014, 4, 25, 0, 0)),
        param('19 मई 2015', datetime(2015, 5, 19, 0, 0)),
        param('2 मार्च 2015', datetime(2015, 3, 2, 0, 0)),
        param('1 जुलाई 2015', datetime(2015, 7, 1, 0, 0)),
        # Swedish
        param('27 augusti 2014', datetime(2014, 8, 27, 0, 0)),
        param('7 mars 2011', datetime(2011, 3, 7, 0, 0)),
        param('30 januari 2015', datetime(2015, 1, 30, 0, 0)),
        param('28 februari 2015', datetime(2015, 2, 28, 0, 0)),
        # Norwegian
        param('5. januar 2014', datetime(2014, 1, 5, 0, 0)),
        param('12. februar 2014', datetime(2014, 2, 12, 0, 0)),
        param('12. mars 2013', datetime(2013, 3, 12, 0, 0)),
        param('4. april 2014', datetime(2014, 4, 4, 0, 0)),
        param('8. mai 2016', datetime(2016, 5, 8, 0, 0)),
        param('11. juni 2012', datetime(2012, 6, 11, 0, 0)),
        param('29. juli 2012', datetime(2012, 7, 29, 0, 0)),
        param('18. august 2012', datetime(2012, 8, 18, 0, 0)),
        param('1. september 2012', datetime(2012, 9, 1, 0, 0)),
        param('6. oktober 2014', datetime(2014, 10, 6, 0, 0)),
        param('28. desember 2014', datetime(2014, 12, 28, 0, 0)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser(settings={'NORMALIZE': False})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14,
                                                  40)),  #wrong
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін',
              datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Filipino dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
    ])
    def test_dates_parsing_with_normalization(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser(settings={'NORMALIZE': True})
        self.when_date_is_parsed(normalize_unicode(date_string))
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)),
        param('17th October, 2034 @ 01:08 am PDT',
              datetime(2034, 10, 17, 8, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700',
              datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000',
              datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string'),
        param('Aug 7, 2014Aug 7, 2014'),
        param('24h ago'),
    ])
    def test_dates_not_parsed(self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ["unknown string format"])

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('10:00PM', datetime(2015, 2, 14, 22, 00)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'past'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'future'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'current_period'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 31),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 25)),
        param('April 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 28)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_utcnow(today)
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 1),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 1),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 31)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 30)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_utcnow(today)
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 8),
              expected=datetime(2015, 2, 1)),
        param('February 2012',
              today=datetime(2015, 1, 7),
              expected=datetime(2012, 2, 1)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 1)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 1)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 1)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_utcnow(today)
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(
            self, prefer_day_of_month=None):
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser(
            settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month})
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    def test_date_is_parsed_when_skip_tokens_are_supplied(self):
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser(settings={'SKIP_TOKENS': ['de']})
        self.when_date_is_parsed('24 April 2012 de')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015'),
        param('32 January 2015'),
        param('31 April 2015'),
        param('31 June 2015'),
        param('31 September 2015'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(
            self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ['Day not in range for month'])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000',
              languages=['fr'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000',
              languages=['en'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000',
              languages=[],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string,
                                                      languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages)
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    def given_utcnow(self, now):
        datetime_mock = Mock(wraps=datetime)
        datetime_mock.utcnow = Mock(return_value=now)
        self.add_patch(
            patch('dateparser.date_parser.datetime', new=datetime_mock))

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset)))

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result

            return wrapped

        self.add_patch(
            patch.object(date_parser, 'parse',
                         collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(
            patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result,
                            "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])
Exemple #24
0
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін',
              datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)),
        param('17th October, 2034 @ 01:08 am PDT',
              datetime(2034, 10, 17, 9, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 1, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700',
              datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000',
              datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string'),
        param('Aug 7, 2014Aug 7, 2014'),
        param('24h ago'),
    ])
    def test_dates_not_parsed(self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ["unknown string format"])

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('10:00PM', datetime(2015, 2, 14, 22, 00)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'past')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'future')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'current_period')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 31),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 25)),
        param('April 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 28)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'current')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 1),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 1),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 31)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 30)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'last')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 8),
              expected=datetime(2015, 2, 1)),
        param('February 2012',
              today=datetime(2015, 1, 7),
              expected=datetime(2012, 2, 1)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 1)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 1)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 1)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'first')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(
            self, prefer_day_of_month=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month)
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser()
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015'),
        param('32 January 2015'),
        param('31 April 2015'),
        param('31 June 2015'),
        param('31 September 2015'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(
            self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ['Day not in range for month'])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000',
              languages=['fr'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000',
              languages=['en'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000',
              languages=[],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string,
                                                      languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages)
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    def given_utcnow(self, now):
        datetime_mock = Mock(wraps=datetime)
        datetime_mock.utcnow = Mock(return_value=now)
        self.add_patch(
            patch('dateparser.date_parser.datetime', new=datetime_mock))

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset)))

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result

            return wrapped

        self.add_patch(
            patch.object(date_parser, 'parse',
                         collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(
            patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def given_configuration(self, key, value):
        self.add_patch(patch.object(settings, key, new=value))

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result,
                            "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])
class RelevancePeriodExtractor(Task):

    """A Task runner that extracts the period a sources's content reffers to
        (is relevant for).
    """

    def __init__(self, config):
        super(RelevancePeriodExtractor, self).__init__(config)
        timeliness_params = self.config['timeliness']
        self.extract_period = timeliness_params.get('extract_period', False)
        self.timeliness_strategy = timeliness_params.get('timeliness_strategy', [])
        self.date_order = timeliness_params.get('date_order', 'DMY')
        self.max_empty_relevance_period = timeliness_params.get('max_empty_relevance_period', 10)
        if not self.timeliness_strategy:
            raise ValueError('You need to provide values for "timeliness_strategy."')
        datapackage_check = DataPackageChecker(self.config)
        datapackage_check.check_database_completeness([self.source_file])
        settings = {'RETURN_AS_TIMEZONE_AWARE': False,
                    'PREFER_DAY_OF_MONTH': 'last',
                    'PREFER_DATES_FROM': 'past',
                    'SKIP_TOKENS': ['to'],
                    'DATE_ORDER': self.date_order}
        self.date_parser = DateDataParser(allow_redetect_language=True,
                                          settings=settings)

    def run(self):
        """Try to indentify the relevance period of sources"""

        sources = self.extract_period_from_sources()
        empty_period_sources = [source for source in sources
                                if source['period_id'] is None]
        empty_period_percent = (len(empty_period_sources) * 100) / len(sources)
        empty_period_percent = round(empty_period_percent)
        if empty_period_percent > int(self.max_empty_relevance_period):
            msg = ('The relevance period couldn\'t be identified for'
                   ' {0}% of sources therefore timeliness cannot be'
                   ' assessed. Please provide more fields for "timeliness_'
                   'strategy", set "assess_timeliness" to false or increase'
                   ' "max_empty_relevance_period".').format(empty_period_percent)
            raise exceptions.UnableToAssessTimeliness(msg)

        for source in sources:
            if source['period_id'] is None:
                creation_date = utilities.date_from_string(source['created_at'])
                dates = [creation_date, creation_date]
            else:
                period_start, period_end = source['period_id']
                dates = [period_start.date(), period_end.date()]
            dates = [date.strftime('%d-%m-%Y') if isinstance(date, datetime.date)
                     else '' for date in dates]
            source['period_id'] = '/'.join(dates)
        self.update_sources_period(sources)

    def extract_period_from_sources(self):
        """Try to extract relevance period for each source or return None"""

        sources = []
        with compat.UnicodeDictReader(self.source_file) as source_file:
            timeliness_set = set(self.timeliness_strategy)
            found_fields = timeliness_set.intersection(set(source_file.header))
            if not found_fields:
                raise ValueError(('At least one of the "timeliness_strategy" '
                                  'fields must be present in your "source_file".'))
            if not found_fields.issuperset(timeliness_set):
                missing_fields = timeliness_set.difference(found_fields)
                print(('Fields "{0}" from "timeliness_strategy" were not found '
                       'in your `source_file`').format(missing_fields))

            for source in source_file:
                timeliness_fields = {field: val for field, val in source.items()
                                     if field in self.timeliness_strategy}
                extracted_period = self.identify_period(timeliness_fields)
                source['period_id'] = extracted_period
                sources.append(source)
        return sources

    def identify_period(self, source={}):
        """Try to indentify the period of a source based on timeliess strategy

        Args:
            source: a dict corresponding to a source_file row
        """

        field_dates = {}
        for field in self.timeliness_strategy:
            value = source.get(field, '')
            if not value:
                continue
            field_dates[field] = self.extract_dates(value)

        for field in self.timeliness_strategy:
            dates = field_dates.get(field, [])
            if not dates:
                continue
            period = resolve_period(dates)
            if period:
                break
            else:
                # It means we have more than 2 dates
                other_fields = list(self.timeliness_strategy)
                other_fields.remove(field)
                other_values = [field_dates.get(other_field, [])
                                for other_field in other_fields]
                for values in other_values:
                    date_objects = set(date['date_obj'] for date in dates)
                    common_values = [date for date in values
                                     if date['date_obj'] in date_objects]
                    period = resolve_period(common_values)
            if period:
                break
        else:
            period = None
        return period

    def extract_dates(self, line=""):
        """Try to extract dates from a line

        Args:
            line: a string that could contain a date or time range
        """

        dates = []
        potential_dates = re.findall(r'[0-9]+[\W_][0-9]+[\W_][0-9]+', line)
        line_words = re.sub(r'[\W_]+', ' ', line).split()
        years = filter_years(line_words)
        for word in years:
            if re.search(r'[a-zA-Z]', word):
                potential_dates.append(word)
                break
            for index, entry in enumerate(line_words):
                if entry == word:
                    date = self.scan_for_date(line_words, index)
                    if date:
                        potential_dates.append(date)
                        # Try to find a range
                        if date['period'] != 'year' and date['date_obj']:
                            range_start = self.scan_for_range(line_words, index, date)
                            if not range_start:
                                continue
                            if range_start['date_obj'] < date['date_obj']:
                                potential_dates.append(range_start)

        for potential_date in potential_dates:
            try:
                dates.append(self.date_parser.get_date_data(potential_date))
            except TypeError:
                if isinstance(potential_date, dict):
                    dates.append(potential_date)
            except ValueError:
                potential_date = None
        dates = [date for date in dates if date['date_obj'] is not None]
        dates = list({date['date_obj']:date for date in dates}.values())
        return dates

    def scan_for_date(self, line_words, year_index):
        """Scan around the year for a date as complete as possible

        Args:
            line_words: a list of words (strings)
            year_index: index of a string from line_word that contains a year
        """

        date_parts = line_words[year_index-2:year_index+1] or \
                     line_words[:year_index+1]
        potential_date = self.create_date_from_parts(date_parts)
        if not potential_date or potential_date['period'] == 'year':
            new_parts = list(reversed(line_words[year_index:year_index+3]))
            new_potential_date = self.create_date_from_parts(new_parts)
            if new_potential_date:
                potential_date = new_potential_date
        return potential_date

    def scan_for_range(self, line_words, year_index, range_end):
        """Scan to the left of the year whose corresponding date has
            been extracted to see if there is a range.

          Args:
            line_words: a list of words (strings)
            year_index: index of a string from line_word that contains a year
            range_end: date that has already been extracted from the year at
                        year_index, potentially end of range
        """

        if range_end['period'] == 'month':
            scan_start = year_index-2
            scan_end = year_index-4
        else:
            scan_start = year_index-3
            scan_end = year_index-5
        range_start_parts = line_words[scan_end:scan_start+1] or \
                            line_words[:scan_start+1]
        range_start_parts = [part for part in range_start_parts
                             if self.create_date_from_parts([part]) is not None]
        years = filter_years(range_start_parts)
        if years:
            range_start_parts = []
        if range_start_parts:
            if len(range_start_parts) == 1 and range_end['period'] == 'day':
                range_start_parts.append(compat.str(range_end['date_obj'].month))
            range_start_parts.append(compat.str(range_end['date_obj'].year))
        range_start = self.create_date_from_parts(range_start_parts)
        if range_start and range_start['period'] != range_end['period']:
            range_start = None
        return range_start

    def create_date_from_parts(self, date_parts=None):
        """Try to create a date object with date_parser or return None."""

        if not date_parts:
            return None
        for index, part in enumerate(date_parts):
            if len(date_parts) == 2:
                if False not in [el.isdigit() for el in date_parts]:
                    date_parts.insert(index, '31')
            potential_date = ' '.join(date_parts[index:])
            try:
                date = self.date_parser.get_date_data(potential_date)
            except (ValueError, TypeError):
                date = None
            if date and date.get('date_obj') is not None:
                break
        else:
            date = None
        return date

    def update_sources_period(self, new_sources):
        """Overwrite source_file with the identified period_id"""

        source_resource = utilities.get_datapackage_resource(self.source_file,
                                                             self.datapackage)
        source_idx = self.datapackage.resources.index(source_resource)
        source_schema_dict = self.datapackage.resources[source_idx].descriptor['schema']
        updates = {'fields':[{'name': 'period_id', 'type': 'string',
                   'title': 'The period source data is relevant for.'}]}
        utilities.deep_update_dict(source_schema_dict, updates)
        source_schema = SchemaModel(source_schema_dict)

        with compat.UnicodeWriter(self.source_file) as source_file:
            source_file.writerow(source_schema.headers)
            for row in utilities.dicts_to_schema_rows(new_sources,
                                                      source_schema):
                source_file.writerow(row)
Exemple #26
0
 def _parser_get_date(self, date_string, date_formats, languages):
     parser = DateDataParser(languages)
     return parser.get_date_data(date_string, date_formats)
class TestFreshnessDateDataParser(BaseTestCase):
    def setUp(self):
        super(TestFreshnessDateDataParser, self).setUp()
        self.now = datetime(2014, 9, 1, 10, 30)
        self.date_string = NotImplemented
        self.parser = NotImplemented
        self.result = NotImplemented
        self.freshness_parser = NotImplemented
        self.freshness_result = NotImplemented
        self.date = NotImplemented
        self.time = NotImplemented

    @parameterized.expand([
        # English dates
        param('yesterday', ago={'days': 1}, period='day'),
        param('the day before yesterday', ago={'days': 2}, period='day'),
        param('today', ago={'days': 0}, period='day'),
        param('an hour ago', ago={'hours': 1}, period='day'),
        param('about an hour ago', ago={'hours': 1}, period='day'),
        param('a day ago', ago={'days': 1}, period='day'),
        param('a week ago', ago={'weeks': 1}, period='week'),
        param('one week ago', ago={'weeks': 1}, period='week'),
        param('2 hours ago', ago={'hours': 2}, period='day'),
        param('about 23 hours ago', ago={'hours': 23}, period='day'),
        param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'),
        param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'),
        param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'),
        param('15 hr', ago={'hours': 15}, period='day'),
        param('15 hrs', ago={'hours': 15}, period='day'),
        param('2 min', ago={'minutes': 2}, period='day'),
        param('2 mins', ago={'minutes': 2}, period='day'),
        param('3 sec', ago={'seconds': 3}, period='day'),
        param('1000 years ago', ago={'years': 1000}, period='year'),
        param('2013 years ago', ago={'years': 2013}, period='year'),  # We've fixed .now in setUp
        param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'),
        param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'),
        param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),
        param('just now', ago={'seconds': 0}, period='day'),

        # French dates
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd’hui", ago={'days': 0}, period='day'),
        param("Aujourdʼhui", ago={'days': 0}, period='day'),
        param("Aujourdʻhui", ago={'days': 0}, period='day'),
        param("Aujourd՚hui", ago={'days': 0}, period='day'),
        param("Aujourdꞌhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd′hui", ago={'days': 0}, period='day'),
        param("Aujourd‵hui", ago={'days': 0}, period='day'),
        param("Aujourdʹhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Hier", ago={'days': 1}, period='day'),
        param("Avant-hier", ago={'days': 2}, period='day'),
        param('Il ya un jour', ago={'days': 1}, period='day'),
        param('Il ya une heure', ago={'hours': 1}, period='day'),
        param('Il ya 2 heures', ago={'hours': 2}, period='day'),
        param('Il ya environ 23 heures', ago={'hours': 23}, period='day'),
        param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'),
        param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'),
        param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),
        param('Il y a 40 min', ago={'minutes': 40}, period='day'),

        # German dates
        param('Heute', ago={'days': 0}, period='day'),
        param('Gestern', ago={'days': 1}, period='day'),
        param('vorgestern', ago={'days': 2}, period='day'),
        param('vor einem Tag', ago={'days': 1}, period='day'),
        param('vor einer Stunden', ago={'hours': 1}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'),
        param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'),
        param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'),
        param('vor 29h', ago={'hours': 29}, period='day'),
        param('vor 29m', ago={'minutes': 29}, period='day'),
        param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Italian dates
        param('oggi', ago={'days': 0}, period='day'),
        param('ieri', ago={'days': 1}, period='day'),
        param('2 ore fa', ago={'hours': 2}, period='day'),
        param('circa 23 ore fa', ago={'hours': 23}, period='day'),
        param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'),
        param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'),
        param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Portuguese dates
        param('ontem', ago={'days': 1}, period='day'),
        param('anteontem', ago={'days': 2}, period='day'),
        param('hoje', ago={'days': 0}, period='day'),
        param('uma hora atrás', ago={'hours': 1}, period='day'),
        param('1 segundo atrás', ago={'seconds': 1}, period='day'),
        param('um dia atrás', ago={'days': 1}, period='day'),
        param('uma semana atrás', ago={'weeks': 1}, period='week'),
        param('2 horas atrás', ago={'hours': 2}, period='day'),
        param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'),
        param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Turkish dates
        param('Dün', ago={'days': 1}, period='day'),
        param('Bugün', ago={'days': 0}, period='day'),
        param('2 saat önce', ago={'hours': 2}, period='day'),
        param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'),
        param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'),
        param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'),
        param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Russian dates
        param('сегодня', ago={'days': 0}, period='day'),
        param('Вчера в', ago={'days': 1}, period='day'),
        param('вчера', ago={'days': 1}, period='day'),
        param('2 часа назад', ago={'hours': 2}, period='day'),
        param('час назад', ago={'hours': 1}, period='day'),
        param('минуту назад', ago={'minutes': 1}, period='day'),
        param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'),
        param('около 23 часов назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'),
        param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'),
        param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Czech dates
        param('Dnes', ago={'days': 0}, period='day'),
        param('Včera', ago={'days': 1}, period='day'),
        param('Předevčírem', ago={'days': 2}, period='day'),
        param('Před 2 hodinami', ago={'hours': 2}, period='day'),
        param('před přibližně 23 hodin', ago={'hours': 23}, period='day'),
        param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'),
        param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'),
        param('3 dny', ago={'days': 3}, period='day'),
        param('3 hodiny', ago={'hours': 3}, period='day'),
        param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Spanish dates
        param('anteayer', ago={'days': 2}, period='day'),
        param('ayer', ago={'days': 1}, period='day'),
        param('hoy', ago={'days': 0}, period='day'),
        param('hace una hora', ago={'hours': 1}, period='day'),
        param('Hace un día', ago={'days': 1}, period='day'),
        param('Hace una semana', ago={'weeks': 1}, period='week'),
        param('Hace 2 horas', ago={'hours': 2}, period='day'),
        param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'),
        param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Chinese dates
        param('昨天', ago={'days': 1}, period='day'),
        param('前天', ago={'days': 2}, period='day'),
        param('2小时前', ago={'hours': 2}, period='day'),
        param('约23小时前', ago={'hours': 23}, period='day'),
        param('1年2个月', ago={'years': 1, 'months': 2}, period='month'),
        param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1年11个月', ago={'years': 1, 'months': 11}, period='month'),
        param('1年,1月,1周,1天,1小时,1分钟前',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Arabic dates
        param('اليوم', ago={'days': 0}, period='day'),
        param('يوم أمس', ago={'days': 1}, period='day'),
        param('منذ يومين', ago={'days': 2}, period='day'),
        param('منذ 3 أيام', ago={'days': 3}, period='day'),
        param('منذ 21 أيام', ago={'days': 21}, period='day'),
        param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Thai dates
        param('วันนี้', ago={'days': 0}, period='day'),
        param('เมื่อวานนี้', ago={'days': 1}, period='day'),
        param('2 วัน', ago={'days': 2}, period='day'),
        param('2 ชั่วโมง', ago={'hours': 2}, period='day'),
        param('23 ชม.', ago={'hours': 23}, period='day'),
        param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'),
        param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1},
              period='week'),
        param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Vietnamese dates
        param('Hôm nay', ago={'days': 0}, period='day'),
        param('Hôm qua', ago={'days': 1}, period='day'),
        param('2 giờ', ago={'hours': 2}, period='day'),
        param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'),
        # following test unsupported, refer to discussion at:
        # http://github.com/scrapinghub/dateparser/issues/33
        #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút',
        #      ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
        #      period='day'),

        # Belarusian dates
        param('сёння', ago={'days': 0}, period='day'),
        param('учора ў', ago={'days': 1}, period='day'),
        param('ўчора', ago={'days': 1}, period='day'),
        param('пазаўчора', ago={'days': 2}, period='day'),
        param('2 гадзіны таму назад', ago={'hours': 2}, period='day'),
        param('2 гадзіны таму', ago={'hours': 2}, period='day'),
        param('гадзіну назад', ago={'hours': 1}, period='day'),
        param('хвіліну таму', ago={'minutes': 1}, period='day'),
        param('2 гадзіны 21 хвіл. назад', ago={'hours': 2, 'minutes': 21}, period='day'),
        param('каля 23 гадзін назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяцы', ago={'years': 1, 'months': 2}, period='month'),
        param('1 год, 09 месяцаў, 01 тыдзень', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('2 гады 3 месяцы', ago={'years': 2, 'months': 3}, period='month'),
        param('5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад',
              ago={'years': 5, 'months': 1, 'weeks': 6, 'days': 3, 'hours': 5, 'minutes': 1, 'seconds': 3},
              period='day'),

        # Polish dates
        param("wczoraj", ago={'days': 1}, period='day'),
        param("1 godz. 2 minuty temu", ago={'hours': 1, 'minutes': 2}, period='day'),
        param("2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu",
              ago={'years': 2, 'months': 3, 'weeks': 1, 'days': 2, 'hours': 4, 'minutes': 15, 'seconds': 25},
              period='day'),
        param("2 minuty temu", ago={'minutes': 2}, period='day'),
        param("15 minut temu", ago={'minutes': 15}, period='day'),
    ])
    def test_relative_dates(self, date_string, ago, period):
        self.given_parser(settings={'NORMALIZE': False})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_date_obj_is_exactly_this_time_ago(ago)
        self.then_period_is(period)

    @parameterized.expand([
        # English dates
        param('yesterday', ago={'days': 1}, period='day'),
        param('the day before yesterday', ago={'days': 2}, period='day'),
        param('today', ago={'days': 0}, period='day'),
        param('an hour ago', ago={'hours': 1}, period='day'),
        param('about an hour ago', ago={'hours': 1}, period='day'),
        param('a day ago', ago={'days': 1}, period='day'),
        param('a week ago', ago={'weeks': 1}, period='week'),
        param('one week ago', ago={'weeks': 1}, period='week'),
        param('2 hours ago', ago={'hours': 2}, period='day'),
        param('about 23 hours ago', ago={'hours': 23}, period='day'),
        param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'),
        param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'),
        param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'),
        param('15 hr', ago={'hours': 15}, period='day'),
        param('15 hrs', ago={'hours': 15}, period='day'),
        param('2 min', ago={'minutes': 2}, period='day'),
        param('2 mins', ago={'minutes': 2}, period='day'),
        param('3 sec', ago={'seconds': 3}, period='day'),
        param('1000 years ago', ago={'years': 1000}, period='year'),
        param('2013 years ago', ago={'years': 2013}, period='year'),  # We've fixed .now in setUp
        param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'),
        param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'),
        param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),
        param('just now', ago={'seconds': 0}, period='day'),

        # French dates
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd’hui", ago={'days': 0}, period='day'),
        param("Aujourdʼhui", ago={'days': 0}, period='day'),
        param("Aujourdʻhui", ago={'days': 0}, period='day'),
        param("Aujourd՚hui", ago={'days': 0}, period='day'),
        param("Aujourdꞌhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd′hui", ago={'days': 0}, period='day'),
        param("Aujourd‵hui", ago={'days': 0}, period='day'),
        param("Aujourdʹhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Hier", ago={'days': 1}, period='day'),
        param("Avant-hier", ago={'days': 2}, period='day'),
        param('Il ya un jour', ago={'days': 1}, period='day'),
        param('Il ya une heure', ago={'hours': 1}, period='day'),
        param('Il ya 2 heures', ago={'hours': 2}, period='day'),
        param('Il ya environ 23 heures', ago={'hours': 23}, period='day'),
        param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'),
        param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'),
        param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),
        param('Il y a 40 min', ago={'minutes': 40}, period='day'),

        # German dates
        param('Heute', ago={'days': 0}, period='day'),
        param('Gestern', ago={'days': 1}, period='day'),
        param('vorgestern', ago={'days': 2}, period='day'),
        param('vor einem Tag', ago={'days': 1}, period='day'),
        param('vor einer Stunden', ago={'hours': 1}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'),
        param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'),
        param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'),
        param('vor 29h', ago={'hours': 29}, period='day'),
        param('vor 29m', ago={'minutes': 29}, period='day'),
        param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Italian dates
        param('oggi', ago={'days': 0}, period='day'),
        param('ieri', ago={'days': 1}, period='day'),
        param('2 ore fa', ago={'hours': 2}, period='day'),
        param('circa 23 ore fa', ago={'hours': 23}, period='day'),
        param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'),
        param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'),
        param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Portuguese dates
        param('ontem', ago={'days': 1}, period='day'),
        param('anteontem', ago={'days': 2}, period='day'),
        param('hoje', ago={'days': 0}, period='day'),
        param('uma hora atrás', ago={'hours': 1}, period='day'),
        param('1 segundo atrás', ago={'seconds': 1}, period='day'),
        param('um dia atrás', ago={'days': 1}, period='day'),
        param('uma semana atrás', ago={'weeks': 1}, period='week'),
        param('2 horas atrás', ago={'hours': 2}, period='day'),
        param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'),
        param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Turkish dates
        param('Dün', ago={'days': 1}, period='day'),
        param('Bugün', ago={'days': 0}, period='day'),
        param('2 saat önce', ago={'hours': 2}, period='day'),
        param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'),
        param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'),
        param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'),
        param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Russian dates
        param('сегодня', ago={'days': 0}, period='day'),
        param('Вчера в', ago={'days': 1}, period='day'),
        param('вчера', ago={'days': 1}, period='day'),
        param('2 часа назад', ago={'hours': 2}, period='day'),
        param('час назад', ago={'hours': 1}, period='day'),
        param('минуту назад', ago={'minutes': 1}, period='day'),
        param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'),
        param('около 23 часов назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'),
        param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'),
        param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Czech dates
        param('Dnes', ago={'days': 0}, period='day'),
        param('Včera', ago={'days': 1}, period='day'),
        param('Předevčírem', ago={'days': 2}, period='day'),
        param('Před 2 hodinami', ago={'hours': 2}, period='day'),
        param('před přibližně 23 hodin', ago={'hours': 23}, period='day'),
        param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'),
        param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'),
        param('3 dny', ago={'days': 3}, period='day'),
        param('3 hodiny', ago={'hours': 3}, period='day'),
        param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Spanish dates
        param('anteayer', ago={'days': 2}, period='day'),
        param('ayer', ago={'days': 1}, period='day'),
        param('hoy', ago={'days': 0}, period='day'),
        param('hace una hora', ago={'hours': 1}, period='day'),
        param('Hace un día', ago={'days': 1}, period='day'),
        param('Hace una semana', ago={'weeks': 1}, period='week'),
        param('Hace 2 horas', ago={'hours': 2}, period='day'),
        param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'),
        param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Chinese dates
        param('昨天', ago={'days': 1}, period='day'),
        param('前天', ago={'days': 2}, period='day'),
        param('2小时前', ago={'hours': 2}, period='day'),
        param('约23小时前', ago={'hours': 23}, period='day'),
        param('1年2个月', ago={'years': 1, 'months': 2}, period='month'),
        param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1年11个月', ago={'years': 1, 'months': 11}, period='month'),
        param('1年,1月,1周,1天,1小时,1分钟前',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Arabic dates
        param('اليوم', ago={'days': 0}, period='day'),
        param('يوم أمس', ago={'days': 1}, period='day'),
        param('منذ يومين', ago={'days': 2}, period='day'),
        param('منذ 3 أيام', ago={'days': 3}, period='day'),
        param('منذ 21 أيام', ago={'days': 21}, period='day'),
        param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Thai dates
        param('วันนี้', ago={'days': 0}, period='day'),
        param('เมื่อวานนี้', ago={'days': 1}, period='day'),
        param('2 วัน', ago={'days': 2}, period='day'),
        param('2 ชั่วโมง', ago={'hours': 2}, period='day'),
        param('23 ชม.', ago={'hours': 23}, period='day'),
        param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'),
        param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1},
              period='week'),
        param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Vietnamese dates
        param('Hôm nay', ago={'days': 0}, period='day'),
        param('Hôm qua', ago={'days': 1}, period='day'),
        param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'),

        # Belarusian dates
        param('сёння', ago={'days': 0}, period='day'),
        param('учора ў', ago={'days': 1}, period='day'),
        param('ўчора', ago={'days': 1}, period='day'),
        param('пазаўчора', ago={'days': 2}, period='day'),
        param('2 гадзіны таму назад', ago={'hours': 2}, period='day'),
        param('2 гадзіны таму', ago={'hours': 2}, period='day'),
        param('гадзіну назад', ago={'hours': 1}, period='day'),
        param('хвіліну таму', ago={'minutes': 1}, period='day'),
        param('2 гадзіны 21 хвіл. назад', ago={'hours': 2, 'minutes': 21}, period='day'),
        param('каля 23 гадзін назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяцы', ago={'years': 1, 'months': 2}, period='month'),
        param('1 год, 09 месяцаў, 01 тыдзень', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('2 гады 3 месяцы', ago={'years': 2, 'months': 3}, period='month'),
        param('5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад',
              ago={'years': 5, 'months': 1, 'weeks': 6, 'days': 3, 'hours': 5, 'minutes': 1, 'seconds': 3},
              period='day'),

        # Polish dates
        param("wczoraj", ago={'days': 1}, period='day'),
        param("1 godz. 2 minuty temu", ago={'hours': 1, 'minutes': 2}, period='day'),
        param("2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu",
              ago={'years': 2, 'months': 3, 'weeks': 1, 'days': 2, 'hours': 4, 'minutes': 15, 'seconds': 25},
              period='day'),
        param("2 minuty temu", ago={'minutes': 2}, period='day'),
        param("15 minut temu", ago={'minutes': 15}, period='day'),
    ])
    def test_normalized_relative_dates(self, date_string, ago, period):
        date_string = normalize_unicode(date_string)
        self.given_parser(settings={'NORMALIZE': True})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_date_obj_is_exactly_this_time_ago(ago)
        self.then_period_is(period)

    @parameterized.expand([
        param('15th of Aug, 2014 Diane Bennett'),
    ])
    def test_insane_dates(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_not_parsed()

    @parameterized.expand([
        param('5000 years ago'),
        param('2014 years ago'),  # We've fixed .now in setUp
        param('{} months ago'.format(2013 * 12 + 9)),
    ])
    def test_dates_not_supported_by_date_time(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_raised(ValueError, ['year is out of range',
                                                "('year must be in 1..9999'"])

    @parameterized.expand([
        param('несколько секунд назад', boundary={'seconds': 45}, period='day'),
        param('há alguns segundos', boundary={'seconds': 45}, period='day'),
    ])
    def test_inexplicit_dates(self, date_string, boundary, period):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_period_is(period)
        self.then_date_obj_is_between(self.now - timedelta(**boundary), self.now)

    @parameterized.expand([
        param('Today at 9 pm', date(2014, 9, 1), time(21, 0)),
        param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)),
        param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)),
        param('the day before yesterday 16:50', date(2014, 8, 30), time(16, 50)),
        param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)),
        param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)),
        param('Dnes v 12:40', date(2014, 9, 1), time(12, 40)),
        param('1 week ago at 12:00 am', date(2014, 8, 25), time(0, 0)),
    ])
    def test_freshness_date_with_time(self, date_string, date, time):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', 'Asia/Karachi', date(2014, 9, 1), time(13, 30)),
        param('3 hours ago', 'Europe/Paris', date(2014, 9, 1), time(9, 30)),
        param('5 hours ago', 'US/Eastern', date(2014, 9, 1), time(1, 30)), # date in DST range
        param('Today at 9 pm', 'Asia/Karachi', date(2014, 9, 1), time(21, 0)), # time given, hence, no shift applies
    ])
    def test_freshness_date_with_pytz_timezones(self, date_string, timezone, date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', 'PKT', date(2014, 9, 1), time(13, 30)),
        param('5 hours ago', 'EST', date(2014, 9, 1), time(0, 30)),
        param('3 hours ago', 'MET', date(2014, 9, 1), time(8, 30)),
    ])
    def test_freshness_date_with_timezone_abbreviations(self, date_string, timezone, date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', '+05:00', date(2014, 9, 1), time(13, 30)),
        param('5 hours ago', '-05:00', date(2014, 9, 1), time(0, 30)),
        param('3 hours ago', '+01:00', date(2014, 9, 1), time(8, 30)),
    ])
    def test_freshness_date_with_timezone_utc_offset(self, date_string, timezone, date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    def given_date_string(self, date_string):
        self.date_string = date_string

    def given_parser(self, settings=None):

        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(*args, **kwargs):
                self.freshness_result = get_date_data(*args, **kwargs)
                return self.freshness_result
            return wrapped
        self.add_patch(patch.object(freshness_date_parser,
                                    'get_date_data',
                                    collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)
        self.add_patch(patch.object(self.freshness_parser, 'now', self.now))

        dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime)
        dt_mock.utcnow = Mock(return_value=self.now)
        self.add_patch(patch('dateparser.freshness_date_parser.datetime', new=dt_mock))
        self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser))
        self.parser = DateDataParser(settings=settings)

    def when_date_is_parsed(self):
        try:
            self.result = self.parser.get_date_data(self.date_string)
        except Exception as error:
            self.error = error

    def then_date_is(self, date):
        self.assertEqual(date, self.result['date_obj'].date())

    def then_time_is(self, time):
        self.assertEqual(time, self.result['date_obj'].time())

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_is_between(self, low_boundary, high_boundary):
        self.assertGreater(self.result['date_obj'], low_boundary)
        self.assertLess(self.result['date_obj'], high_boundary)

    def then_date_obj_is_exactly_this_time_ago(self, ago):
        self.assertEqual(self.now - relativedelta(**ago), self.result['date_obj'])

    def then_date_was_not_parsed(self):
        self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string)

    def then_date_was_parsed_by_freshness_parser(self):
        self.assertEqual(self.result, self.freshness_result)

    def then_error_was_not_raised(self):
        self.assertEqual(NotImplemented, self.error)
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)),
        param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 1, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string'),
        param('Aug 7, 2014Aug 7, 2014'),
        param('24h ago'),
    ])
    def test_dates_not_parsed(self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ["unknown string format"])

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('10:00PM', datetime(2015, 2, 14, 22, 00)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'past')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'future')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)


    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'current_period')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)),
        param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'current')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'last')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)),
        param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'first')
        self.given_utcnow(today)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month)
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser()
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015'),
        param('32 January 2015'),
        param('31 April 2015'),
        param('31 June 2015'),
        param('31 September 2015'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ['Day not in range for month'])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages)
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    def given_utcnow(self, now):
        datetime_mock = Mock(wraps=datetime)
        datetime_mock.utcnow = Mock(return_value=now)
        self.add_patch(patch('dateparser.date_parser.datetime', new=datetime_mock))

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset))
        )

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result
            return wrapped

        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def given_configuration(self, key, value):
        self.add_patch(patch.object(settings, key, new=value))

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])
class TestFreshnessDateDataParser(BaseTestCase):
    def setUp(self):
        super(TestFreshnessDateDataParser, self).setUp()
        self.now = datetime(2014, 9, 1, 10, 30)
        self.date_string = NotImplemented
        self.parser = NotImplemented
        self.result = NotImplemented
        self.freshness_parser = NotImplemented
        self.freshness_result = NotImplemented
        self.exception = NotImplemented
        self.date = NotImplemented
        self.time = NotImplemented

    @parameterized.expand([
        # English dates
        param('yesterday', ago={'days': 1}, period='day'),
        param('the day before yesterday', ago={'days': 2}, period='day'),
        param('today', ago={'days': 0}, period='day'),
        param('an hour ago', ago={'hours': 1}, period='day'),
        param('about an hour ago', ago={'hours': 1}, period='day'),
        param('a day ago', ago={'days': 1}, period='day'),
        param('a week ago', ago={'weeks': 1}, period='week'),
        param('one week ago', ago={'weeks': 1}, period='week'),
        param('2 hours ago', ago={'hours': 2}, period='day'),
        param('about 23 hours ago', ago={'hours': 23}, period='day'),
        param('1 year 2 months', ago={'years': 1, 'months': 2}, period='month'),
        param('1 year, 09 months,01 weeks', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 year 11 months', ago={'years': 1, 'months': 11}, period='month'),
        param('1 year 12 months', ago={'years': 1, 'months': 12}, period='month'),
        param('15 hr', ago={'hours': 15}, period='day'),
        param('15 hrs', ago={'hours': 15}, period='day'),
        param('2 min', ago={'minutes': 2}, period='day'),
        param('2 mins', ago={'minutes': 2}, period='day'),
        param('3 sec', ago={'seconds': 3}, period='day'),
        param('1000 years ago', ago={'years': 1000}, period='year'),
        param('2013 years ago', ago={'years': 2013}, period='year'),  # We've fixed .now in setUp
        param('5000 months ago', ago={'years': 416, 'months': 8}, period='month'),
        param('{} months ago'.format(2013 * 12 + 8), ago={'years': 2013, 'months': 8}, period='month'),
        param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # French dates
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Hier", ago={'days': 1}, period='day'),
        param("Avant-hier", ago={'days': 2}, period='day'),
        param('Il ya un jour', ago={'days': 1}, period='day'),
        param('Il ya une heure', ago={'hours': 1}, period='day'),
        param('Il ya 2 heures', ago={'hours': 2}, period='day'),
        param('Il ya environ 23 heures', ago={'hours': 23}, period='day'),
        param('1 an 2 mois', ago={'years': 1, 'months': 2}, period='month'),
        param('1 année, 09 mois, 01 semaines', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 an 11 mois', ago={'years': 1, 'months': 11}, period='month'),
        param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),
        param('Il y a 40 min', ago={'minutes': 40}, period='day'),

        # German dates
        param('Heute', ago={'days': 0}, period='day'),
        param('Gestern', ago={'days': 1}, period='day'),
        param('vorgestern', ago={'days': 2}, period='day'),
        param('vor einem Tag', ago={'days': 1}, period='day'),
        param('vor einer Stunden', ago={'hours': 1}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'),
        param('1 Jahr 2 Monate', ago={'years': 1, 'months': 2}, period='month'),
        param('1 Jahr, 09 Monate, 01 Wochen', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 Jahr 11 Monate', ago={'years': 1, 'months': 11}, period='month'),
        param('vor 29h', ago={'hours': 29}, period='day'),
        param('vor 29m', ago={'minutes': 29}, period='day'),
        param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Italian dates
        param('oggi', ago={'days': 0}, period='day'),
        param('ieri', ago={'days': 1}, period='day'),
        param('2 ore fa', ago={'hours': 2}, period='day'),
        param('circa 23 ore fa', ago={'hours': 23}, period='day'),
        param('1 anno 2 mesi', ago={'years': 1, 'months': 2}, period='month'),
        param('1 anno, 09 mesi, 01 settimane', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 anno 11 mesi', ago={'years': 1, 'months': 11}, period='month'),
        param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Portuguese dates
        param('ontem', ago={'days': 1}, period='day'),
        param('anteontem', ago={'days': 2}, period='day'),
        param('hoje', ago={'days': 0}, period='day'),
        param('uma hora atrás', ago={'hours': 1}, period='day'),
        param('um dia atrás', ago={'days': 1}, period='day'),
        param('uma semana atrás', ago={'weeks': 1}, period='week'),
        param('2 horas atrás', ago={'hours': 2}, period='day'),
        param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'),
        param('1 ano 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 ano, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 ano 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Turkish dates
        param('Dün', ago={'days': 1}, period='day'),
        param('2 saat önce', ago={'hours': 2}, period='day'),
        param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'),
        param('1 yıl 2 ay', ago={'years': 1, 'months': 2}, period='month'),
        param('1 yıl, 09 ay, 01 hafta', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 yıl 11 ay', ago={'years': 1, 'months': 11}, period='month'),
        param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Russian dates
        param('сегодня', ago={'days': 0}, period='day'),
        param('Вчера в', ago={'days': 1}, period='day'),
        param('вчера', ago={'days': 1}, period='day'),
        param('2 часа назад', ago={'hours': 2}, period='day'),
        param('час назад', ago={'hours': 1}, period='day'),
        param('минуту назад', ago={'minutes': 1}, period='day'),
        param('2 ч. 21 мин. назад', ago={'hours': 2, 'minutes': 21}, period='day'),
        param('около 23 часов назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяца', ago={'years': 1, 'months': 2}, period='month'),
        param('1 год, 09 месяцев, 01 недель', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 год 11 месяцев', ago={'years': 1, 'months': 11}, period='month'),
        param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Czech dates
        param('Před 2 hodinami', ago={'hours': 2}, period='day'),
        param('před přibližně 23 hodin', ago={'hours': 23}, period='day'),
        param('1 rok 2 měsíce', ago={'years': 1, 'months': 2}, period='month'),
        param('1 rok, 09 měsíců, 01 týdnů', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 rok 11 měsíců', ago={'years': 1, 'months': 11}, period='month'),
        param('3 dny', ago={'days': 3}, period='day'),
        param('3 hodiny', ago={'hours': 3}, period='day'),
        param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Spanish dates
        param('anteayer', ago={'days': 2}, period='day'),
        param('ayer', ago={'days': 1}, period='day'),
        param('hoy', ago={'days': 0}, period='day'),
        param('hace una hora', ago={'hours': 1}, period='day'),
        param('Hace un día', ago={'days': 1}, period='day'),
        param('Hace una semana', ago={'weeks': 1}, period='week'),
        param('Hace 2 horas', ago={'hours': 2}, period='day'),
        param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'),
        param('1 año 2 meses', ago={'years': 1, 'months': 2}, period='month'),
        param('1 año, 09 meses, 01 semanas', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1 año 11 meses', ago={'years': 1, 'months': 11}, period='month'),
        param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Chinese dates
        param('昨天', ago={'days': 1}, period='day'),
        param('前天', ago={'days': 2}, period='day'),
        param('2小时前', ago={'hours': 2}, period='day'),
        param('约23小时前', ago={'hours': 23}, period='day'),
        param('1年2个月', ago={'years': 1, 'months': 2}, period='month'),
        param('1年09月,01周', ago={'years': 1, 'months': 9, 'weeks': 1}, period='week'),
        param('1年11个月', ago={'years': 1, 'months': 11}, period='month'),
        param('1年,1月,1周,1天,1小时,1分钟前',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Arabic dates
        param('اليوم', ago={'days': 0}, period='day'),
        param('يوم أمس', ago={'days': 1}, period='day'),
        param('منذ يومين', ago={'days': 2}, period='day'),
        param('منذ 3 أيام', ago={'days': 3}, period='day'),
        param('منذ 21 أيام', ago={'days': 21}, period='day'),
        param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Thai dates
        param('วันนี้', ago={'days': 0}, period='day'),
        param('เมื่อวานนี้', ago={'days': 1}, period='day'),
        param('2 วัน', ago={'days': 2}, period='day'),
        param('2 ชั่วโมง', ago={'hours': 2}, period='day'),
        param('23 ชม.', ago={'hours': 23}, period='day'),
        param('2 สัปดาห์ 3 วัน', ago={'weeks': 2, 'days': 3}, period='day'),
        param('1 ปี 9 เดือน 1 สัปดาห์', ago={'years': 1, 'months': 9, 'weeks': 1},
              period='week'),
        param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที',
              ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
              period='day'),

        # Vietnamese dates
        param('Hôm nay', ago={'days': 0}, period='day'),
        param('Hôm qua', ago={'days': 1}, period='day'),
        param('2 giờ', ago={'hours': 2}, period='day'),
        param('2 tuần 3 ngày', ago={'weeks': 2, 'days': 3}, period='day'),
        # following test unsupported, refer to discussion at:
        # http://github.com/scrapinghub/dateparser/issues/33
        #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút',
        #      ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
        #      period='day'),
    ])
    def test_relative_dates(self, date_string, ago, period):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_date_obj_is_exactly_this_time_ago(ago)
        self.then_period_is(period)

    @parameterized.expand([
        param('15th of Aug, 2014 Diane Bennett'),
    ])
    def test_insane_dates(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_not_parsed()

    @parameterized.expand([
        param('5000 years ago'),
        param('2014 years ago'),  # We've fixed .now in setUp
        param('{} months ago'.format(2013 * 12 + 9)),
    ])
    def test_dates_not_supported_by_date_time(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_raised(ValueError, 'year is out of range')

    @parameterized.expand([
        param('несколько секунд назад', boundary={'seconds': 45}, period='day'),
        param('há alguns segundos', boundary={'seconds': 45}, period='day'),
    ])
    def test_inexplicit_dates(self, date_string, boundary, period):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_period_is(period)
        self.then_date_obj_is_between(self.now - timedelta(**boundary), self.now)

    @parameterized.expand([
        param('Today at 9 pm', date(2014, 9, 1), time(21, 0)),
        param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)),
        param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)),
        param('the day before yesterday 16:50', date(2014, 8, 30), time(16, 50)),
        param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)),
        param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)),
    ])
    def test_freshness_date_with_time(self, date_string, date, time):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    def given_date_string(self, date_string):
        self.date_string = date_string

    def given_parser(self):
        self.add_patch(patch.object(freshness_date_parser, 'now', self.now))

        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(date_string):
                self.freshness_result = get_date_data(date_string)
                return self.freshness_result
            return wrapped
        self.add_patch(patch.object(freshness_date_parser,
                                    'get_date_data',
                                    collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)
        self.add_patch(patch('dateparser.date.freshness_date_parser', new=self.freshness_parser))
        self.parser = DateDataParser()

    def when_date_is_parsed(self):
        try:
            self.result = self.parser.get_date_data(self.date_string)
        except Exception as error:
            self.exception = error

    def then_date_is(self, date):
        self.assertEqual(date, self.result['date_obj'].date())

    def then_time_is(self, time):
        self.assertEqual(time, self.result['date_obj'].time())

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_is_between(self, low_boundary, high_boundary):
        self.assertGreater(self.result['date_obj'], low_boundary)
        self.assertLess(self.result['date_obj'], high_boundary)

    def then_date_obj_is_exactly_this_time_ago(self, ago):
        self.assertEqual(self.now - relativedelta(**ago), self.result['date_obj'])

    def then_date_was_not_parsed(self):
        self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string)

    def then_date_was_parsed_by_freshness_parser(self):
        self.assertEqual(self.result, self.freshness_result)

    def then_error_was_not_raised(self):
        self.assertEqual(NotImplemented, self.exception)

    def then_error_was_raised(self, error_cls, expected_regexp=None):
        self.assertIsInstance(self.exception, error_cls)

        if expected_regexp is None:
            return

        if isinstance(expected_regexp, basestring):
            expected_regexp = re.compile(expected_regexp)

        if not expected_regexp.search(str(self.exception)):
            raise self.failureException('"%s" does not match "%s"' % (expected_regexp.pattern, str(self.exception)))
Exemple #30
0
 def _parse_date(self, string):
     parser = DateDataParser()
     date = parser.get_date_data(string)['date_obj']
     if date is None:
         raise RuntimeError('Unable to parse date: {!r}'.format(string))
     return date.date()
Exemple #31
0
 def adapt(self, text, htmlpage=None):
     try:
         return DateDataParser().get_date_data(text)['date_obj']
     except ValueError:
         return
Exemple #32
0
 def date_to_datetime_range(text,
                            relative_base=None,
                            prefer_dates_from='past'):
     if relative_base is None:
         relative_base = doc_date
     # Handle relative date ranges like "the past ___ days"
     relative_num_days = re.sub(relative_duration_range_re, "", text)
     if len(relative_num_days) < len(text):
         num_days_datetime_range = date_to_datetime_range(
             relative_num_days)
         if not num_days_datetime_range:
             return None
         return [num_days_datetime_range[0], relative_base]
     text = clean_date_str(text)
     if len(text) < 3:
         return None
     # Handle ordinal dates like "the second month of 2006"
     match = ordinal_date_re.match(text)
     if match:
         match_dict = match.groupdict()
         if match_dict['ordinal']:
             ordinal_number = ORDINALS.index(match_dict['ordinal']) + 1
         else:
             ordinal_number = int(match_dict['ordinal_number'])
         unit = match_dict['unit']
         rest = match_dict['rest']
         if unit == 'day':
             return date_to_datetime_range(
                 str(ordinal_number) + " " + rest)
         elif unit == 'week':
             if ordinal_number > 4:
                 return
             parsed_remainder = date_to_datetime_range("1 " + rest)
             if not parsed_remainder:
                 return
             week_start = parsed_remainder[0]
             week_start = date_to_datetime_range(
                 "Sunday",
                 # A day is added because if the base date is on Sunday
                 # the prior sunday will be used.
                 relative_base=week_start + relativedelta(days=1))[0]
             for _ in range(ordinal_number - 1):
                 week_start = date_to_datetime_range(
                     "Sunday",
                     relative_base=week_start + relativedelta(days=1),
                     prefer_dates_from='future')[0]
             return [week_start, week_start + relativedelta(days=7)]
         elif unit == 'month':
             month_name = datetime.datetime(2000, ordinal_number,
                                            1).strftime("%B ")
             return date_to_datetime_range(month_name + rest)
         else:
             raise Exception("Unknown time unit: " + unit)
     # handle dates like "1950s" since dateparser doesn't
     decade_match = re.match(r"(\d{4})s", text)
     if decade_match:
         decade = int(decade_match.groups()[0])
         return [
             datetime.datetime(decade, 1, 1),
             datetime.datetime(decade + 10, 1, 1)
         ]
     parser = DateDataParser(
         ['en'],
         settings={
             'RELATIVE_BASE': relative_base or datetime.datetime.now(),
             'PREFER_DATES_FROM': prefer_dates_from
         })
     try:
         text = re.sub(r" year$", "", text)
         date_data = parser.get_date_data(text)
     except (TypeError, ValueError):
         return
     if date_data['date_obj']:
         date = date_data['date_obj']
         if date_data['period'] == 'day':
             return [date, date + relativedelta(days=1)]
         elif date_data['period'] == 'month':
             date = datetime.datetime(date.year, date.month, 1)
             return [date, date + relativedelta(months=1)]
         elif date_data['period'] == 'year':
             date = datetime.datetime(date.year, 1, 1)
             return [date, date + relativedelta(years=1)]
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand(
        [
            # English dates
            param("[Sept] 04, 2014.", datetime(2014, 9, 4)),
            param("Tuesday Jul 22, 2014", datetime(2014, 7, 22)),
            param("10:04am EDT", datetime(2012, 11, 13, 14, 4)),
            param("Friday", datetime(2012, 11, 9)),
            param("November 19, 2014 at noon", datetime(2014, 11, 19, 12, 0)),
            param("December 13, 2014 at midnight", datetime(2014, 12, 13, 0, 0)),
            param("Nov 25 2014 10:17 pm EST", datetime(2014, 11, 26, 3, 17)),
            param("Wed Aug 05 12:00:00 EDT 2015", datetime(2015, 8, 5, 16, 0)),
            param("April 9, 2013 at 6:11 a.m.", datetime(2013, 4, 9, 6, 11)),
            param("Aug. 9, 2012 at 2:57 p.m.", datetime(2012, 8, 9, 14, 57)),
            param("December 10, 2014, 11:02:21 pm", datetime(2014, 12, 10, 23, 2, 21)),
            param("8:25 a.m. Dec. 12, 2014", datetime(2014, 12, 12, 8, 25)),
            param("2:21 p.m., December 11, 2014", datetime(2014, 12, 11, 14, 21)),
            param("Fri, 12 Dec 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)),
            param("20 Mar 2013 10h11", datetime(2013, 3, 20, 10, 11)),
            param("10:06am Dec 11, 2014", datetime(2014, 12, 11, 10, 6)),
            param("19 February 2013 year 09:10", datetime(2013, 2, 19, 9, 10)),
            # French dates
            param("11 Mai 2014", datetime(2014, 5, 11)),
            param("dimanche, 11 Mai 2014", datetime(2014, 5, 11)),
            param("22 janvier 2015 à 14h40", datetime(2015, 1, 22, 14, 40)),
            param("Dimanche 1er Février à 21:24", datetime(2012, 2, 1, 21, 24)),
            param("vendredi, décembre 5 2014.", datetime(2014, 12, 5, 0, 0)),
            param("le 08 Déc 2014 15:11", datetime(2014, 12, 8, 15, 11)),
            param("Le 11 Décembre 2014 à 09:00", datetime(2014, 12, 11, 9, 0)),
            param("fév 15, 2013", datetime(2013, 2, 15, 0, 0)),
            param("Jeu 15:12", datetime(2012, 11, 8, 15, 12)),
            # Spanish dates
            param("Martes 21 de Octubre de 2014", datetime(2014, 10, 21)),
            param("Miércoles 20 de Noviembre de 2013", datetime(2013, 11, 20)),
            param("12 de junio del 2012", datetime(2012, 6, 12)),
            param("13 Ago, 2014", datetime(2014, 8, 13)),
            param("13 Septiembre, 2014", datetime(2014, 9, 13)),
            param("11 Marzo, 2014", datetime(2014, 3, 11)),
            param("julio 5, 2015 en 1:04 pm", datetime(2015, 7, 5, 13, 4)),
            param("Vi 17:15", datetime(2012, 11, 9, 17, 15)),
            # Dutch dates
            param("11 augustus 2014", datetime(2014, 8, 11)),
            param("14 januari 2014", datetime(2014, 1, 14)),
            param("vr jan 24, 2014 12:49", datetime(2014, 1, 24, 12, 49)),
            # Italian dates
            param("16 giu 2014", datetime(2014, 6, 16)),
            param("26 gennaio 2014", datetime(2014, 1, 26)),
            param("Ven 18:23", datetime(2012, 11, 9, 18, 23)),
            # Portuguese dates
            param("sexta-feira, 10 de junho de 2014 14:52", datetime(2014, 6, 10, 14, 52)),
            param("13 Setembro, 2014", datetime(2014, 9, 13)),
            param("Sab 3:03", datetime(2012, 11, 10, 3, 3)),
            # Russian dates
            param("10 мая", datetime(2012, 5, 10)),  # forum.codenet.ru
            param("26 апреля", datetime(2012, 4, 26)),
            param("20 ноября 2013", datetime(2013, 11, 20)),
            param("28 октября 2014 в 07:54", datetime(2014, 10, 28, 7, 54)),
            param("13 января 2015 г. в 13:34", datetime(2015, 1, 13, 13, 34)),
            param("09 августа 2012", datetime(2012, 8, 9, 0, 0)),
            param("Авг 26, 2015 15:12", datetime(2015, 8, 26, 15, 12)),
            param("2 Декабрь 95 11:15", datetime(1995, 12, 2, 11, 15)),
            param("13 янв. 2005 19:13", datetime(2005, 1, 13, 19, 13)),
            param("13 авг. 2005 19:13", datetime(2005, 8, 13, 19, 13)),
            param("13 авг. 2005г. 19:13", datetime(2005, 8, 13, 19, 13)),
            param("13 авг. 2005 г. 19:13", datetime(2005, 8, 13, 19, 13)),
            # Turkish dates
            param("11 Ağustos, 2014", datetime(2014, 8, 11)),
            param("08.Haziran.2014, 11:07", datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
            param("17.Şubat.2014, 17:51", datetime(2014, 2, 17, 17, 51)),
            param("14-Aralık-2012, 20:56", datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
            # Romanian dates
            param("13 iunie 2013", datetime(2013, 6, 13)),
            param("14 aprilie 2014", datetime(2014, 4, 14)),
            param("18 martie 2012", datetime(2012, 3, 18)),
            param("S 14:14", datetime(2012, 11, 10, 14, 14)),
            param("12-Iun-2013", datetime(2013, 6, 12)),
            # German dates
            param("21. Dezember 2013", datetime(2013, 12, 21)),
            param("19. Februar 2012", datetime(2012, 2, 19)),
            param("26. Juli 2014", datetime(2014, 7, 26)),
            param("18.10.14 um 22:56 Uhr", datetime(2014, 10, 18, 22, 56)),
            param("12-Mär-2014", datetime(2014, 3, 12)),
            param("Mit 13:14", datetime(2012, 11, 7, 13, 14)),
            # Czech dates
            param("pon 16. čer 2014 10:07:43", datetime(2014, 6, 16, 10, 7, 43)),
            param("13 Srpen, 2014", datetime(2014, 8, 13)),
            param("čtv 14. lis 2013 12:38:43", datetime(2013, 11, 14, 12, 38, 43)),
            # Thai dates
            param("ธันวาคม 11, 2014, 08:55:08 PM", datetime(2014, 12, 11, 20, 55, 8)),
            param("22 พฤษภาคม 2012, 22:12", datetime(2012, 5, 22, 22, 12)),
            param("11 กุมภา 2020, 8:13 AM", datetime(2020, 2, 11, 8, 13)),
            param("1 เดือนตุลาคม 2005, 1:00 AM", datetime(2005, 10, 1, 1, 0)),
            param("11 ก.พ. 2020, 1:13 pm", datetime(2020, 2, 11, 13, 13)),
            # Vietnamese dates
            param("Thứ năm", datetime(2012, 11, 8)),  # Thursday
            param("Thứ sáu", datetime(2012, 11, 9)),  # Friday
            param("Tháng Mười Hai 29, 2013, 14:14", datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
            param("05 Tháng một 2015 - 03:54 AM", datetime(2015, 1, 5, 3, 54)),
            # Belarusian dates
            param("11 траўня", datetime(2012, 5, 11)),
            param("4 мая", datetime(2012, 5, 4)),
            param("Чацвер 06 жніўня 2015", datetime(2015, 8, 6)),
            param("Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін", datetime(2015, 3, 14, 7, 10)),
            param("5 жніўня 2015 года у 13:34", datetime(2015, 8, 5, 13, 34)),
            # Ukrainian dates
            param("2015-кві-12", datetime(2015, 4, 12)),
            param("21 чер 2013 3:13", datetime(2013, 6, 21, 3, 13)),
            param("12 лютого 2012, 13:12:23", datetime(2012, 2, 12, 13, 12, 23)),
            param("вів о 14:04", datetime(2012, 11, 6, 14, 4)),
            # Tagalog dates
            param("12 Hulyo 2003 13:01", datetime(2003, 7, 12, 13, 1)),
            param("1978, 1 Peb, 7:05 PM", datetime(1978, 2, 1, 19, 5)),
            param("2 hun", datetime(2012, 6, 2)),
            param("Lin 16:16", datetime(2012, 11, 11, 16, 16)),
            # Japanese dates
            param("2016年3月20日(日) 21時40分", datetime(2016, 3, 20, 21, 40)),
            param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
            # Numeric dates
            param("06-17-2014", datetime(2014, 6, 17)),
            param("13/03/2014", datetime(2014, 3, 13)),
            param("11. 12. 2014, 08:45:39", datetime(2014, 11, 12, 8, 45, 39)),
            # Miscellaneous dates
            param("1 Ni 2015", datetime(2015, 4, 1, 0, 0)),
            param("1 Mar 2015", datetime(2015, 3, 1, 0, 0)),
            param("1 Paz 2015", datetime(2015, 10, 1, 0, 0)),
            param("1 сер 2015", datetime(2015, 8, 1, 0, 0)),
            param("2016020417:10", datetime(2016, 2, 4, 17, 10)),
            # Chinese dates
            param("2015年04月08日10:05", datetime(2015, 4, 8, 10, 5)),
            param("2012年12月20日10:35", datetime(2012, 12, 20, 10, 35)),
            param("2016年 2月 5日", datetime(2016, 2, 5, 0, 0)),
        ]
    )
    def test_dates_parsing(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={"NORMALIZE": False, "RELATIVE_BASE": datetime(2012, 11, 13)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is("day")
        self.then_date_obj_exactly_is(expected)

    def test_stringified_datetime_should_parse_fine(self):
        expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256)
        self.given_parser(settings={"RELATIVE_BASE": expected_date})
        date_string = str(self.parser.get_date_data("today")["date_obj"])
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is("day")
        self.then_date_obj_exactly_is(expected_date)

    @parameterized.expand(
        [
            # English dates
            param("[Sept] 04, 2014.", datetime(2014, 9, 4)),
            param("Tuesday Jul 22, 2014", datetime(2014, 7, 22)),
            param("10:04am EDT", datetime(2012, 11, 13, 14, 4)),
            param("Friday", datetime(2012, 11, 9)),
            param("November 19, 2014 at noon", datetime(2014, 11, 19, 12, 0)),
            param("December 13, 2014 at midnight", datetime(2014, 12, 13, 0, 0)),
            param("Nov 25 2014 10:17 pm EST", datetime(2014, 11, 26, 3, 17)),
            param("Wed Aug 05 12:00:00 EDT 2015", datetime(2015, 8, 5, 16, 0)),
            param("April 9, 2013 at 6:11 a.m.", datetime(2013, 4, 9, 6, 11)),
            param("Aug. 9, 2012 at 2:57 p.m.", datetime(2012, 8, 9, 14, 57)),
            param("December 10, 2014, 11:02:21 pm", datetime(2014, 12, 10, 23, 2, 21)),
            param("8:25 a.m. Dec. 12, 2014", datetime(2014, 12, 12, 8, 25)),
            param("2:21 p.m., December 11, 2014", datetime(2014, 12, 11, 14, 21)),
            param("Fri, 12 Dec 2014 10:55:50", datetime(2014, 12, 12, 10, 55, 50)),
            param("20 Mar 2013 10h11", datetime(2013, 3, 20, 10, 11)),
            param("10:06am Dec 11, 2014", datetime(2014, 12, 11, 10, 6)),
            param("19 February 2013 year 09:10", datetime(2013, 2, 19, 9, 10)),
            # French dates
            param("11 Mai 2014", datetime(2014, 5, 11)),
            param("dimanche, 11 Mai 2014", datetime(2014, 5, 11)),
            param("22 janvier 2015 à 14h40", datetime(2015, 1, 22, 14, 40)),  # wrong
            param("Dimanche 1er Février à 21:24", datetime(2012, 2, 1, 21, 24)),
            param("vendredi, décembre 5 2014.", datetime(2014, 12, 5, 0, 0)),
            param("le 08 Déc 2014 15:11", datetime(2014, 12, 8, 15, 11)),
            param("Le 11 Décembre 2014 à 09:00", datetime(2014, 12, 11, 9, 0)),
            param("fév 15, 2013", datetime(2013, 2, 15, 0, 0)),
            param("Jeu 15:12", datetime(2012, 11, 8, 15, 12)),
            # Spanish dates
            param("Martes 21 de Octubre de 2014", datetime(2014, 10, 21)),
            param("Miércoles 20 de Noviembre de 2013", datetime(2013, 11, 20)),
            param("12 de junio del 2012", datetime(2012, 6, 12)),
            param("13 Ago, 2014", datetime(2014, 8, 13)),
            param("13 Septiembre, 2014", datetime(2014, 9, 13)),
            param("11 Marzo, 2014", datetime(2014, 3, 11)),
            param("julio 5, 2015 en 1:04 pm", datetime(2015, 7, 5, 13, 4)),
            param("Vi 17:15", datetime(2012, 11, 9, 17, 15)),
            # Dutch dates
            param("11 augustus 2014", datetime(2014, 8, 11)),
            param("14 januari 2014", datetime(2014, 1, 14)),
            param("vr jan 24, 2014 12:49", datetime(2014, 1, 24, 12, 49)),
            # Italian dates
            param("16 giu 2014", datetime(2014, 6, 16)),
            param("26 gennaio 2014", datetime(2014, 1, 26)),
            param("Ven 18:23", datetime(2012, 11, 9, 18, 23)),
            # Portuguese dates
            param("sexta-feira, 10 de junho de 2014 14:52", datetime(2014, 6, 10, 14, 52)),
            param("13 Setembro, 2014", datetime(2014, 9, 13)),
            param("Sab 3:03", datetime(2012, 11, 10, 3, 3)),
            # Russian dates
            param("10 мая", datetime(2012, 5, 10)),  # forum.codenet.ru
            param("26 апреля", datetime(2012, 4, 26)),
            param("20 ноября 2013", datetime(2013, 11, 20)),
            param("28 октября 2014 в 07:54", datetime(2014, 10, 28, 7, 54)),
            param("13 января 2015 г. в 13:34", datetime(2015, 1, 13, 13, 34)),
            param("09 августа 2012", datetime(2012, 8, 9, 0, 0)),
            param("Авг 26, 2015 15:12", datetime(2015, 8, 26, 15, 12)),
            param("2 Декабрь 95 11:15", datetime(1995, 12, 2, 11, 15)),
            param("13 янв. 2005 19:13", datetime(2005, 1, 13, 19, 13)),
            param("13 авг. 2005 19:13", datetime(2005, 8, 13, 19, 13)),
            param("13 авг. 2005г. 19:13", datetime(2005, 8, 13, 19, 13)),
            param("13 авг. 2005 г. 19:13", datetime(2005, 8, 13, 19, 13)),
            # Turkish dates
            param("11 Ağustos, 2014", datetime(2014, 8, 11)),
            param("08.Haziran.2014, 11:07", datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
            param("17.Şubat.2014, 17:51", datetime(2014, 2, 17, 17, 51)),
            param("14-Aralık-2012, 20:56", datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
            # Romanian dates
            param("13 iunie 2013", datetime(2013, 6, 13)),
            param("14 aprilie 2014", datetime(2014, 4, 14)),
            param("18 martie 2012", datetime(2012, 3, 18)),
            param("S 14:14", datetime(2012, 11, 10, 14, 14)),
            param("12-Iun-2013", datetime(2013, 6, 12)),
            # German dates
            param("21. Dezember 2013", datetime(2013, 12, 21)),
            param("19. Februar 2012", datetime(2012, 2, 19)),
            param("26. Juli 2014", datetime(2014, 7, 26)),
            param("18.10.14 um 22:56 Uhr", datetime(2014, 10, 18, 22, 56)),
            param("12-Mär-2014", datetime(2014, 3, 12)),
            param("Mit 13:14", datetime(2012, 11, 7, 13, 14)),
            # Czech dates
            param("pon 16. čer 2014 10:07:43", datetime(2014, 6, 16, 10, 7, 43)),
            param("13 Srpen, 2014", datetime(2014, 8, 13)),
            param("čtv 14. lis 2013 12:38:43", datetime(2013, 11, 14, 12, 38, 43)),
            # Thai dates
            param("ธันวาคม 11, 2014, 08:55:08 PM", datetime(2014, 12, 11, 20, 55, 8)),
            param("22 พฤษภาคม 2012, 22:12", datetime(2012, 5, 22, 22, 12)),
            param("11 กุมภา 2020, 8:13 AM", datetime(2020, 2, 11, 8, 13)),
            param("1 เดือนตุลาคม 2005, 1:00 AM", datetime(2005, 10, 1, 1, 0)),
            param("11 ก.พ. 2020, 1:13 pm", datetime(2020, 2, 11, 13, 13)),
            # Vietnamese dates
            param("Thứ năm", datetime(2012, 11, 8)),  # Thursday
            param("Thứ sáu", datetime(2012, 11, 9)),  # Friday
            param("Tháng Mười Hai 29, 2013, 14:14", datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
            param("05 Tháng một 2015 - 03:54 AM", datetime(2015, 1, 5, 3, 54)),
            # Belarusian dates
            param("11 траўня", datetime(2012, 5, 11)),
            param("4 мая", datetime(2012, 5, 4)),
            param("Чацвер 06 жніўня 2015", datetime(2015, 8, 6)),
            param("Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін", datetime(2015, 3, 14, 7, 10)),
            param("5 жніўня 2015 года у 13:34", datetime(2015, 8, 5, 13, 34)),
            # Ukrainian dates
            param("2015-кві-12", datetime(2015, 4, 12)),
            param("21 чер 2013 3:13", datetime(2013, 6, 21, 3, 13)),
            param("12 лютого 2012, 13:12:23", datetime(2012, 2, 12, 13, 12, 23)),
            param("вів о 14:04", datetime(2012, 11, 6, 14, 4)),
            # Filipino dates
            param("12 Hulyo 2003 13:01", datetime(2003, 7, 12, 13, 1)),
            param("1978, 1 Peb, 7:05 PM", datetime(1978, 2, 1, 19, 5)),
            param("2 hun", datetime(2012, 6, 2)),
            param("Lin 16:16", datetime(2012, 11, 11, 16, 16)),
            # Japanese dates
            param("2016年3月20日(日) 21時40分", datetime(2016, 3, 20, 21, 40)),
            param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
            # Numeric dates
            param("06-17-2014", datetime(2014, 6, 17)),
            param("13/03/2014", datetime(2014, 3, 13)),
            param("11. 12. 2014, 08:45:39", datetime(2014, 11, 12, 8, 45, 39)),
            # Miscellaneous dates
            param("1 Ni 2015", datetime(2015, 4, 1, 0, 0)),
            param("1 Mar 2015", datetime(2015, 3, 1, 0, 0)),
            param("1 Paz 2015", datetime(2015, 10, 1, 0, 0)),
            param("1 сер 2015", datetime(2015, 8, 1, 0, 0)),
        ]
    )
    def test_dates_parsing_with_normalization(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={"NORMALIZE": True, "RELATIVE_BASE": datetime(2012, 11, 13)})
        self.when_date_is_parsed(normalize_unicode(date_string))
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is("day")
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("Sep 03 2014 | 4:32 pm EDT", datetime(2014, 9, 3, 20, 32)),
            param("17th October, 2034 @ 01:08 am PDT", datetime(2034, 10, 17, 8, 8)),
            param("15 May 2004 23:24 EDT", datetime(2004, 5, 16, 3, 24)),
            param("15 May 2004", datetime(2004, 5, 15, 0, 0)),
            param("08/17/14 17:00 (PDT)", datetime(2014, 8, 18, 0, 0)),
        ]
    )
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is("day")
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("15 May 2004 16:10 -0400", datetime(2004, 5, 15, 20, 10)),
            param("1999-12-31 19:00:00 -0500", datetime(2000, 1, 1, 0, 0)),
            param("1999-12-31 19:00:00 +0500", datetime(1999, 12, 31, 14, 0)),
            param("Fri, 09 Sep 2005 13:51:39 -0700", datetime(2005, 9, 9, 20, 51, 39)),
            param("Fri, 09 Sep 2005 13:51:39 +0000", datetime(2005, 9, 9, 13, 51, 39)),
        ]
    )
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is("day")
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser("")
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand(
        [
            param("invalid date string", "Unable to parse: h"),
            param("Aug 7, 2014Aug 7, 2014", "Unable to parse: Aug"),
            param("24h ago", "Unable to parse: h"),
            param("2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00", "Unable to parser: 00:002015"),
        ]
    )
    def test_dates_not_parsed(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, message)

    @parameterized.expand(
        [
            param("10 December", datetime(2014, 12, 10)),
            param("March", datetime(2014, 3, 15)),
            param("Friday", datetime(2015, 2, 13)),
            param("Monday", datetime(2015, 2, 9)),
            param("10:00PM", datetime(2015, 2, 14, 22, 0)),
            param("16:10", datetime(2015, 2, 14, 16, 10)),
            param("14:05", datetime(2015, 2, 15, 14, 5)),
            param("15 february 15:00", datetime(2015, 2, 15, 15, 0)),
        ]
    )
    def test_preferably_past_dates(self, date_string, expected):
        self.given_parser(settings={"PREFER_DATES_FROM": "past", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("10 December", datetime(2015, 12, 10)),
            param("March", datetime(2015, 3, 15)),
            param("Friday", datetime(2015, 2, 20)),
            param("Monday", datetime(2015, 2, 16)),
            param("10:00PM", datetime(2015, 2, 15, 22, 0)),
            param("16:10", datetime(2015, 2, 15, 16, 10)),
            param("14:05", datetime(2015, 2, 16, 14, 5)),
        ]
    )
    def test_preferably_future_dates(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={"PREFER_DATES_FROM": "future", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("10 December", datetime(2015, 12, 10)),
            param("March", datetime(2015, 3, 15)),
            param("Friday", datetime(2015, 2, 13)),
            param("10:00PM", datetime(2015, 2, 15, 22, 00)),
            param("16:10", datetime(2015, 2, 15, 16, 10)),
            param("14:05", datetime(2015, 2, 15, 14, 5)),
        ]
    )
    def test_dates_without_preference(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(
            settings={"PREFER_DATES_FROM": "current_period", "RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)}
        )
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("February 2015", today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)),
            param("February 2012", today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)),
            param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)),
            param("April 2015", today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)),
            param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)),
            param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)),
        ]
    )
    def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={"PREFER_DAY_OF_MONTH": "current", "RELATIVE_BASE": today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("February 2015", today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)),
            param("February 2012", today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)),
            param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)),
            param("April 2015", today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)),
            param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)),
            param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)),
        ]
    )
    def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={"PREFER_DAY_OF_MONTH": "last", "RELATIVE_BASE": today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("February 2015", today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)),
            param("February 2012", today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)),
            param("March 2015", today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)),
            param("April 2015", today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)),
            param("April 2015", today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)),
            param("December 2014", today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)),
        ]
    )
    def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={"PREFER_DAY_OF_MONTH": "first", "RELATIVE_BASE": today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [param(prefer_day_of_month="current"), param(prefer_day_of_month="last"), param(prefer_day_of_month="first")]
    )
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None):
        self.given_parser(settings={"PREFER_DAY_OF_MONTH": prefer_day_of_month, "RELATIVE_BASE": datetime(2015, 2, 12)})
        self.when_date_is_parsed("24 April 2012")
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    def test_date_is_parsed_when_skip_tokens_are_supplied(self):
        self.given_parser(settings={"SKIP_TOKENS": ["de"], "RELATIVE_BASE": datetime(2015, 2, 12)})
        self.when_date_is_parsed("24 April 2012 de")
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand(
        [
            param("29 February 2015", "day must be in 1..28"),
            param("32 January 2015", "day must be in 1..31"),
            param("31 April 2015", "day must be in 1..30"),
            param("31 June 2015", "day must be in 1..30"),
            param("31 September 2015", "day must be in 1..30"),
        ]
    )
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ["day is out of range for month", message])

    @parameterized.expand(
        [
            param("2015-05-02T10:20:19+0000", languages=["fr"], expected=datetime(2015, 5, 2, 10, 20, 19)),
            param("2015-05-02T10:20:19+0000", languages=["en"], expected=datetime(2015, 5, 2, 10, 20, 19)),
            param("2015-05-02T10:20:19+0000", languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)),
        ]
    )
    def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages, settings={"PREFER_LANGUAGE_DATE_ORDER": False})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand(
        [
            param("10 December", expected=datetime(2015, 12, 10), period="day"),
            param("March", expected=datetime(2015, 3, 15), period="month"),
            param("April", expected=datetime(2015, 4, 15), period="month"),
            param("December", expected=datetime(2015, 12, 15), period="month"),
            param("Friday", expected=datetime(2015, 2, 13), period="day"),
            param("Monday", expected=datetime(2015, 2, 9), period="day"),
            param("10:00PM", expected=datetime(2015, 2, 15, 22, 00), period="day"),
            param("16:10", expected=datetime(2015, 2, 15, 16, 10), period="day"),
            param("2014", expected=datetime(2014, 2, 15), period="year"),
            param("2008", expected=datetime(2008, 2, 15), period="year"),
        ]
    )
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_local_tz_offset(0)
        self.given_parser(settings={"RELATIVE_BASE": datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand(
        [
            param("15-12-18 06:00", expected=datetime(2015, 12, 18, 6, 0), order="YMD"),
            param("15-18-12 06:00", expected=datetime(2015, 12, 18, 6, 0), order="YDM"),
            param("10-11-12 06:00", expected=datetime(2012, 10, 11, 6, 0), order="MDY"),
            param("10-11-12 06:00", expected=datetime(2011, 10, 12, 6, 0), order="MYD"),
            param("10-11-12 06:00", expected=datetime(2011, 12, 10, 6, 0), order="DYM"),
            param("15-12-18 06:00", expected=datetime(2018, 12, 15, 6, 0), order="DMY"),
        ]
    )
    def test_order(self, date_string, expected=None, order=None):
        self.given_parser(settings={"DATE_ORDER": order})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser, "local_tz_offset", new=timedelta(seconds=3600 * offset))
        )

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result

            return wrapped

        self.add_patch(patch.object(date_parser, "parse", collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch("dateparser.date.date_parser", new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result["period"])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result["date_obj"])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed")
        self.assertEqual(self.result["date_obj"], self.date_result[0])
Exemple #34
0
    def annotate(self, doc):
        # If no date is associated with the document, the document's date will
        # be treated as the most recent date explicitly mentioned in the
        # the document.
        detect_date = doc.date is None
        doc_date = doc.date or datetime.datetime.now()
        strict_parser = DateDataParser(['en'],
                                       settings={'STRICT_PARSING': True})

        def date_to_datetime_range(text,
                                   relative_base=None,
                                   prefer_dates_from='past'):
            if relative_base is None:
                relative_base = doc_date
            # Handle relative date ranges like "the past ___ days"
            relative_num_days = re.sub(relative_duration_range_re, "", text)
            if len(relative_num_days) < len(text):
                num_days_datetime_range = date_to_datetime_range(
                    relative_num_days)
                if not num_days_datetime_range:
                    return None
                return [num_days_datetime_range[0], relative_base]
            text = clean_date_str(text)
            if len(text) < 3:
                return None
            # Handle ordinal dates like "the second month of 2006"
            match = ordinal_date_re.match(text)
            if match:
                match_dict = match.groupdict()
                if match_dict['ordinal']:
                    ordinal_number = ORDINALS.index(match_dict['ordinal']) + 1
                else:
                    ordinal_number = int(match_dict['ordinal_number'])
                unit = match_dict['unit']
                rest = match_dict['rest']
                if unit == 'day':
                    return date_to_datetime_range(
                        str(ordinal_number) + " " + rest)
                elif unit == 'week':
                    if ordinal_number > 4:
                        return
                    parsed_remainder = date_to_datetime_range("1 " + rest)
                    if not parsed_remainder:
                        return
                    week_start = parsed_remainder[0]
                    week_start = date_to_datetime_range(
                        "Sunday",
                        # A day is added because if the base date is on Sunday
                        # the prior sunday will be used.
                        relative_base=week_start + relativedelta(days=1))[0]
                    for _ in range(ordinal_number - 1):
                        week_start = date_to_datetime_range(
                            "Sunday",
                            relative_base=week_start + relativedelta(days=1),
                            prefer_dates_from='future')[0]
                    return [week_start, week_start + relativedelta(days=7)]
                elif unit == 'month':
                    month_name = datetime.datetime(2000, ordinal_number,
                                                   1).strftime("%B ")
                    return date_to_datetime_range(month_name + rest)
                else:
                    raise Exception("Unknown time unit: " + unit)
            # handle dates like "1950s" since dateparser doesn't
            decade_match = re.match(r"(\d{4})s", text)
            if decade_match:
                decade = int(decade_match.groups()[0])
                return [
                    datetime.datetime(decade, 1, 1),
                    datetime.datetime(decade + 10, 1, 1)
                ]
            parser = DateDataParser(
                ['en'],
                settings={
                    'RELATIVE_BASE': relative_base or datetime.datetime.now(),
                    'PREFER_DATES_FROM': prefer_dates_from
                })
            try:
                text = re.sub(r" year$", "", text)
                date_data = parser.get_date_data(text)
            except (TypeError, ValueError):
                return
            if date_data['date_obj']:
                date = date_data['date_obj']
                if date_data['period'] == 'day':
                    return [date, date + relativedelta(days=1)]
                elif date_data['period'] == 'month':
                    date = datetime.datetime(date.year, date.month, 1)
                    return [date, date + relativedelta(months=1)]
                elif date_data['period'] == 'year':
                    date = datetime.datetime(date.year, 1, 1)
                    return [date, date + relativedelta(years=1)]

        def parse_non_relative_date(text):
            result = date_to_datetime_range(text,
                                            relative_base=datetime.datetime(
                                                900, 1, 1))
            if result and result[0].year > 1000:
                # If the year is less than 1000 assume the year 900
                # base date was used when parsing so the date is relative.
                return result[0]

        if 'structured_data' not in doc.tiers:
            doc.add_tiers(StructuredDataAnnotator())
        if 'spacy.nes' not in doc.tiers:
            doc.add_tiers(SpacyAnnotator())
        # Create a combine tier of nes and regex dates
        date_span_tier = doc.tiers['spacy.nes'].with_label('DATE')
        # Regex for formatted dates
        regex = re.compile(
            r"\b("
            # parenthetical year
            r"((?<=[\[\(])[1-2]\d{3}(?=[\]\)]))|"
            # date MonthName yyyy
            r"(\d{1,2} [a-zA-Z]{3,} \[?\d{4})|"
            # dd-mm-yyyy
            r"(\d{1,2} ?[\/\-] ?\d{1,2} ?[\/\-] ?\d{1,4})|"
            # yyyy-MMM-dd
            r"(\d{1,4} ?[\/\-] ?[a-z]{3,4} ?[\/\-] ?\d{1,4})|"
            # yyyy-mm-dd
            r"(\d{1,4} ?[\/\-] ?\d{1,2} ?[\/\-] ?\d{1,2})"
            r")\b",
            re.I)
        match_tier = doc.create_regex_tier(regex)
        date_span_tier += match_tier
        # Add year components individually incase the full spans are thrown out.
        # Sometimes extra text is added to dates that makes them invalid,
        # this allows some of the date to be recovered.
        date_span_tier += date_span_tier.match_subspans(r"([1-2]\d{3})")
        # Remove spans that are probably ages.
        date_span_tier = date_span_tier.without_overlaps(
            date_span_tier.match_subspans(r"\bage\b"))
        # Group adjacent date info in case it is parsed as separate chunks.
        # ex: Friday, October 7th 2010.
        adjacent_date_spans = date_span_tier.combined_adjacent_spans(
            max_dist=9)
        grouped_date_spans = []

        def can_combine(text):
            if re.match(r"\d{4}", text, re.I):
                # year only date
                return True
            try:
                return strict_parser.get_date_data(text)['date_obj'] is None
            except (TypeError, ValueError):
                return True

        for date_group in adjacent_date_spans:
            date_group_spans = list(date_group.iterate_leaf_base_spans())
            if any(can_combine(span.text) for span in date_group_spans):
                if date_to_datetime_range(date_group.text) is not None:
                    grouped_date_spans.append(date_group)
        # Find date ranges by looking for joiner words between dates.
        date_range_joiners = [
            t_span for t_span in doc.tiers['spacy.tokens']
            if re.match(r"(" + DATE_RANGE_JOINERS +
                        r"|\-)$", t_span.text, re.I)
        ]
        date_range_tier = date_span_tier.label_spans('start')\
            .with_following_spans_from(date_range_joiners, max_dist=3)\
            .with_following_spans_from(date_span_tier.label_spans('end'), max_dist=3)\
            .label_spans('date_range')
        since_tokens = AnnoTier([
            t_span for t_span in doc.tiers['spacy.tokens']
            if 'since' == t_span.token.lemma_
        ],
                                presorted=True).label_spans('since_token')
        since_date_tier = (
            since_tokens.with_following_spans_from(date_span_tier,
                                                   allow_overlap=True) +
            date_span_tier.with_contained_spans_from(since_tokens)
        ).label_spans('since_date')
        tier_spans = []
        all_date_spans = AnnoTier(date_range_tier.spans + grouped_date_spans +
                                  date_span_tier.spans + since_date_tier.spans)

        if detect_date:
            simple_date_spans = AnnoTier(
                grouped_date_spans +
                date_span_tier.spans).optimal_span_set(prefer='text_length')
            latest_date = None
            for span in simple_date_spans:
                if re.match(r"today|yesterday", span.text, re.I):
                    continue
                try:
                    span_date = strict_parser.get_date_data(
                        span.text)['date_obj']
                except (TypeError, ValueError):
                    continue
                if span_date and span_date < datetime.datetime.now():
                    if not latest_date or span_date > latest_date:
                        latest_date = span_date
            if latest_date:
                doc_date = latest_date

        date_spans_without_structured_data = all_date_spans.without_overlaps(
            doc.tiers['structured_data'])
        date_spans_in_structured_data = []
        dates_by_structured_value = doc.tiers['structured_data.values']\
            .group_spans_by_containing_span(all_date_spans, allow_partial_containment=False)
        for value_span, date_spans in dates_by_structured_value:
            date_spans_in_structured_data += date_spans
        all_date_spans = AnnoTier(date_spans_without_structured_data.spans +
                                  date_spans_in_structured_data
                                  ).optimal_span_set(prefer='text_length')
        for date_span in all_date_spans:
            # Parse the span text into one or two components depending on
            # whether it contains multiple dates for specifying a range.
            if date_span.label == 'date_range':
                range_component_dict = date_span.groupdict()
                range_components = [
                    range_component_dict['start'][0].text,
                    range_component_dict['end'][0].text
                ]
            else:
                range_components = re.split(
                    r"\b(?:" + DATE_RANGE_JOINERS + r")\b", date_span.text,
                    re.I)
                if len(range_components) == 1:
                    hyphenated_components = date_span.text.split("-")
                    if len(hyphenated_components) == 2:
                        range_components = hyphenated_components
                    elif len(hyphenated_components) == 6:
                        # Handle dote ranges like 2015-11-3 - 2015-11-6
                        range_components = [
                            '-'.join(hyphenated_components[:3]),
                            '-'.join(hyphenated_components[3:])
                        ]
            if ends_with_timeunit_re.match(
                    date_span.text) and not relative_duration_range_re.match(
                        date_span.text):
                # Prevent durations like "5 days" from being parsed as specific
                # dates like "5 days ago"
                continue
            elif len(range_components) == 1:
                if date_span.label == 'since_date':
                    date_str = [
                        span for span in date_span.base_spans[0].base_spans
                        if span.label != 'since_token'
                    ][0].text
                    datetime_range = date_to_datetime_range(date_str)
                    if datetime_range is None:
                        continue
                    datetime_range = [datetime_range[0], doc_date]
                else:
                    date_str = range_components[0]
                    datetime_range = date_to_datetime_range(date_str)
                    if datetime_range is None:
                        continue
            elif len(range_components) == 2:
                # Handle partial years (e.g.: 2001-12)
                if re.match(r"\d{1,2}$", range_components[1]):
                    if re.match(r".*\d{1,2}$", range_components[0]):
                        characters_to_sub = "1"
                        if len(range_components[1]) > 1:
                            characters_to_sub = "1,2"
                        range_components[1] = re.sub(
                            r"\d{" + characters_to_sub + "}$",
                            range_components[1], range_components[0])
                # Check for a non-relative date in the range that can be used as
                # a relative base date the other date.
                # Example: March 3 to November 2 1984
                non_relative_dates = [
                    parse_non_relative_date(text) for text in range_components
                ]
                relative_base_date = next((x for x in non_relative_dates if x),
                                          doc_date)
                datetime_range_a = date_to_datetime_range(
                    range_components[0], relative_base=relative_base_date)
                datetime_range_b = date_to_datetime_range(
                    range_components[1], relative_base=relative_base_date)
                if datetime_range_a is None and datetime_range_b is None:
                    continue
                elif datetime_range_a is None:
                    datetime_range = datetime_range_b
                elif datetime_range_b is None:
                    datetime_range = datetime_range_a
                else:
                    # If include_end_date is False treat the span's daterange
                    # as ending at the start of the second date component unless
                    # a word like "through" is used in the second component.
                    if self.include_end_date or\
                       re.search(r"\bthrough\b", date_span.text) or\
                       re.search(r"\b(late|end of)\b", range_components[1]):
                        datetime_range = [
                            datetime_range_a[0], datetime_range_b[1]
                        ]
                    else:
                        datetime_range = [
                            datetime_range_a[0], datetime_range_b[0]
                        ]
            else:
                print("Bad date range split:", date_span.text,
                      range_components)
                continue
            # Omit reverse ranges because they usually come from something
            # being incorrectly parsed. The main exception is relative dates
            # like 2 to 3 weeks ago.
            if datetime_range[0] <= datetime_range[1]:
                tier_spans.append(DateSpan(date_span, datetime_range))
        return {
            'dates': AnnoTier(tier_spans, presorted=True),
            # Include unparsable and non-specific dates
            'dates.all': all_date_spans
        }
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Tagalog dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
        param('2016020417:10', datetime(2016, 2, 4, 17, 10)),
        # Chinese dates
        param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)),
        param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)),
        param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)),
        param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)),
        param('2016年6月29', datetime(2016, 6, 29, 0, 0)),
        param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'NORMALIZE': False,
                                    'RELATIVE_BASE': datetime(2012, 11, 13)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_stringified_datetime_should_parse_fine(self):
        expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256)
        self.given_parser(settings={'RELATIVE_BASE': expected_date})
        date_string = str(self.parser.get_date_data('today')['date_obj'])
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected_date)

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), #wrong
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Filipino dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
    ])
    def test_dates_parsing_with_normalization(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'NORMALIZE': True,
                                    'RELATIVE_BASE': datetime(2012, 11, 13)})
        self.when_date_is_parsed(normalize_unicode(date_string))
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)),
        param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string', 'Unable to parse: h'),
        param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'),
        param('24h ago', 'Unable to parse: h'),
        param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00', 'Unable to parser: 00:002015')
    ])
    def test_dates_not_parsed(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, message)

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('10:00PM', datetime(2015, 2, 14, 22, 0)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
        param('15 february 15:00', datetime(2015, 2, 15, 15, 0)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_parser(settings={'PREFER_DATES_FROM': 'past', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 0)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'current_period', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)),
        param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)),
        param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month, 'RELATIVE_BASE': datetime(2015, 2, 12)})
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    def test_date_is_parsed_when_skip_tokens_are_supplied(self):
        self.given_parser(settings={'SKIP_TOKENS': ['de'], 'RELATIVE_BASE': datetime(2015, 2, 12)})
        self.when_date_is_parsed('24 April 2012 de')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015', 'day must be in 1..28'),
        param('32 January 2015', 'day must be in 1..31'),
        param('31 April 2015', 'day must be in 1..30'),
        param('31 June 2015', 'day must be in 1..30'),
        param('31 September 2015', 'day must be in 1..30'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ['day is out of range for month', message])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages, settings={'PREFER_LANGUAGE_DATE_ORDER': False})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('15-12-18 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YMD'),
        param('15-18-12 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YDM'),
        param('10-11-12 06:00', expected=datetime(2012, 10, 11, 6, 0), order='MDY'),
        param('10-11-12 06:00', expected=datetime(2011, 10, 12, 6, 0), order='MYD'),
        param('10-11-12 06:00', expected=datetime(2011, 12, 10, 6, 0), order='DYM'),
        param('15-12-18 06:00', expected=datetime(2018, 12, 15, 6, 0), order='DMY'),
        param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'),
        param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'),
        param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'),
    ])
    def test_order(self, date_string, expected=None, order=None):
        self.given_parser(settings={'DATE_ORDER': order})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset))
        )

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result
            return wrapped

        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])
Exemple #36
0
class Reminder():
    def __init__(self, config=None):
        self.active_reminder = {}
        self.regex = r'\[(.*)\]'
        self.settings = {'PREFER_DATES_FROM': 'future', 'DATE_ORDER': 'DMY'}
        self.parser = DateDataParser(languages=['en'],
                                     allow_redetect_language=False,
                                     settings=self.settings)

    async def send_reminder_start_msg(self, user, channel, client, time):
        '''
        Gives an acknowledgement that the reminder has been set.
        '''
        time = time.replace(microsecond=0)
        msg = ":+1: %s I'll remind you at %s UTC." % (user, str(time))
        await client.send_message(channel, msg)

    async def send_reminder_end_msg(self, user, channel, client, text):
        '''
        Sends the message when the reminder finishes with the text
        if it was passed in.
        '''
        if text:
            msg = 'Hello %s, you asked me to remind you of **%s**.' % (user,
                                                                       text)
        else:
            msg = 'Hello %s, you asked me to remind you at this time.' % user
        await client.send_message(channel, msg)

    async def start_reminder_sleep(self, delta, user, channel, client, text,
                                   time):
        '''
        Asyncronously sleeps for the reminder length.
        '''
        # Send a message that the reminder is going to be set.
        await self.send_reminder_start_msg(user, channel, client, time)
        await asyncio.sleep(delta.total_seconds())
        await self.send_reminder_end_msg(user, channel, client, text)

    def apply_regex(self, msg):
        '''
        Applies the regex to check if the user passed
        in a optional string in square brackets.
        Returns the original message with the string
        removed and the captured msg.
        '''
        regex_result = re.search(self.regex, msg)
        if regex_result:
            msg = re.sub(self.regex, '', msg).strip()
            return msg, regex_result.group(1)
        else:
            return False

    def parse_msg(self, msg, user):
        '''
        Parses the message passed along with the !remind command.
        Uses the dateparser library to check if the time string
        is valid
        Format: !remindme <time period> [optional string]
        '''
        parsed_time = self.parser.get_date_data(msg)['date_obj']
        if not parsed_time:
            error_msg = ('I could not interept your message %s, try specifing '
                         'the time period in a different format.') % user
            return (False, error_msg)
        now = datetime.utcnow()
        if parsed_time < now:
            error_msg = ("Dont waste my time %s, you can't expect "
                         "me to remind you of an event in the past.") % user
            return (False, error_msg)
        difference = parsed_time - now
        return (True, difference, parsed_time)

    @register('!remindme')
    async def set_reminder(self, msg, user, channel, client, *args, **kwargs):
        '''
        Main function that called to set a reminder. Calls the
        helper functions to parse and to check if its valid.

        If the message is valid, the asyncronous sleep function
        is called.

        Currently loses state on restart ;_; could write/load
        to a file.
        '''
        reminder_txt = None
        optional_string = self.apply_regex(msg)
        if optional_string:
            msg, reminder_txt = optional_string

        parsed_msg = self.parse_msg(msg, user)
        if not parsed_msg[0]:
            return parsed_msg[1]
        else:
            await self.start_reminder_sleep(parsed_msg[1], user, channel,
                                            client, reminder_txt,
                                            parsed_msg[2])
Exemple #37
0
 def _parse_date(self, string):
     parser = DateDataParser()
     date = parser.get_date_data(string)['date_obj']
     if date is None:
         raise RuntimeError('Unable to parse date: {!r}'.format(string))
     return date.date()
Exemple #38
0
 def _parser_get_date(self, date_string, date_formats, languages):
     parser = DateDataParser(languages)
     return parser.get_date_data(date_string, date_formats)
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.date_string = NotImplemented
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 \xe0 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er F\xe9vrier \xe0 21:24', datetime(2012, 2, 1, 21, 24)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        # Turkish dates
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        # Miscellaneous
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('vendredi, d\xe9cembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 D\xe9c 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 D\xe9cembre 2014 \xe0 09:00', datetime(2014, 12, 11, 9, 0)),
        param('f\xe9v 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_utcnow(datetime(2012, 11, 13))  # Tuesday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 21, 32)),
        param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 9, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 4, 24)),
        param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
        ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_local_tz_offset(+1)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(''),
        param('invalid date string'),
        param('Aug 7, 2014Aug 7, 2014'),
    ])
    def test_dates_not_parsed(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_not_parsed()

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 14, 22, 00)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'past')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'future')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_configuration('PREFER_DATES_FROM', 'current_period')
        self.given_utcnow(datetime(2015, 2, 15, 15, 30))  # Sunday
        self.given_local_tz_offset(0)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)),
        param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'current')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'last')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)),
        param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', 'first')
        self.given_utcnow(today)
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None):
        self.given_configuration('PREFER_DAY_OF_MONTH', prefer_day_of_month)
        self.given_utcnow(datetime(2015, 2, 12))
        self.given_parser()
        self.given_date_string('24 April 2012')
        self.when_date_is_parsed()
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015'),
        param('32 January 2015'),
        param('31 April 2015'),
        param('31 June 2015'),
        param('31 September 2015'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string):
        with self.assertRaisesRegexp(ValueError, 'Day not in range for month'):
            DateParser().parse(date_string)

    def given_utcnow(self, now):
        datetime_mock = Mock(wraps=datetime)
        datetime_mock.utcnow = Mock(return_value=now)
        self.add_patch(patch('dateparser.date_parser.datetime', new=datetime_mock))

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset))
        )

    def given_date_string(self, date_string):
        self.date_string = date_string

    def given_parser(self):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(date_string):
                self.date_result = parse(date_string)
                return self.date_result
            return wrapped
        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser()

    def given_configuration(self, key, value):
        self.add_patch(patch.object(settings, key, new=value))

    def when_date_is_parsed(self):
        self.result = self.parser.get_date_data(self.date_string)

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_not_parsed(self):
        self.assertIsNone(self.result['date_obj'], '"%s" should not be parsed' % self.date_string)

    def then_date_was_parsed_by_date_parser(self):
        self.assertEqual(self.result['date_obj'], self.date_result)
Exemple #40
0
class Reminder():

    def __init__(self, config=None):
        self.active_reminder = {}
        self.regex = r'\[(.*)\]'
        self.settings = {'PREFER_DATES_FROM': 'future',
                         'DATE_ORDER': 'DMY'}
        self.parser = DateDataParser(languages=['en'],
                                     allow_redetect_language=False,
                                     settings=self.settings)

    async def send_reminder_start_msg(self, user, channel, client, time):
        '''
        Gives an acknowledgement that the reminder has been set.
        '''
        time = time.replace(microsecond=0)
        msg = ":+1: %s I'll remind you at %s UTC." % (user, str(time))
        await client.send_message(channel, msg)

    async def send_reminder_end_msg(self, user, channel, client, text):
        '''
        Sends the message when the reminder finishes with the text
        if it was passed in.
        '''
        if text:
            msg = 'Hello %s, you asked me to remind you of **%s**.' % (user,
                                                                      text)
        else:
            msg = 'Hello %s, you asked me to remind you at this time.' % user
        await client.send_message(channel, msg)

    async def start_reminder_sleep(self, delta, user, channel, client, text, time):
        '''
        Asyncronously sleeps for the reminder length.
        '''
        # Send a message that the reminder is going to be set.
        await self.send_reminder_start_msg(user, channel, client, time)
        await asyncio.sleep(delta.total_seconds())
        await self.send_reminder_end_msg(user, channel, client, text)

    def apply_regex(self, msg):
        '''
        Applies the regex to check if the user passed
        in a optional string in square brackets.
        Returns the original message with the string
        removed and the captured msg.
        '''
        regex_result = re.search(self.regex, msg)
        if regex_result:
            msg = re.sub(self.regex, '', msg).strip()
            return msg, regex_result.group(1)
        else:
            return False

    def parse_msg(self, msg, user):
        '''
        Parses the message passed along with the !remind command.
        Uses the dateparser library to check if the time string
        is valid
        Format: !remindme <time period> [optional string]
        '''
        parsed_time = self.parser.get_date_data(msg)['date_obj']
        if not parsed_time:
            error_msg = ('I could not interept your message %s, try specifing '
                         'the time period in a different format.') % user
            return (False, error_msg)
        now = datetime.utcnow()
        if parsed_time < now:
            error_msg = ("Dont waste my time %s, you can't expect "
                         "me to remind you of an event in the past.") % user
            return (False, error_msg)
        difference = parsed_time - now
        return (True, difference, parsed_time)

    @register('!remindme')
    async def set_reminder(self, msg, user, channel, client, *args, **kwargs):
        '''
        Main function that called to set a reminder. Calls the
        helper functions to parse and to check if its valid.

        If the message is valid, the asyncronous sleep function
        is called.

        Currently loses state on restart ;_; could write/load
        to a file.
        '''
        reminder_txt = None
        optional_string = self.apply_regex(msg)
        if optional_string:
            msg, reminder_txt = optional_string

        parsed_msg = self.parse_msg(msg, user)
        if not parsed_msg[0]:
            return parsed_msg[1]
        else:
            await self.start_reminder_sleep(parsed_msg[1], user,
                                            channel, client, reminder_txt,
                                            parsed_msg[2])
Exemple #41
0
 def __init__(self, languages = ['en']):
     self.parser = DateDataParser(languages=languages)
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('Tues 9th Aug, 2015', datetime(2015, 8, 9)),
        param('10:04am EDT', datetime(2012, 11, 13, 10, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('13 января 2015 г в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Tagalog dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
        param('2016020417:10', datetime(2016, 2, 4, 17, 10)),
        # Chinese dates
        param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)),
        param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)),
        param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)),
        param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)),
        param('2016年6月29', datetime(2016, 6, 29, 0, 0)),
        param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)),
        param('2016年9月14日晚8:00', datetime(2016, 9, 14, 20, 0)),
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_parser(settings={'NORMALIZE': False,
                                    'RELATIVE_BASE': datetime(2012, 11, 13)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_stringified_datetime_should_parse_fine(self):
        expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256)
        self.given_parser(settings={'RELATIVE_BASE': expected_date})
        date_string = str(self.parser.get_date_data('today')['date_obj'])
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected_date)

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am EDT', datetime(2012, 11, 13, 10, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)),
        param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)), #wrong
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52', datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11, 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20, 56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM', datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14', datetime(2013, 12, 29, 14, 14)),  # bpsosrcs.wordpress.com
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін', datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 6, 14, 4)),
        # Filipino dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 Paz 2015', datetime(2015, 10, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
    ])
    def test_dates_parsing_with_normalization(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'NORMALIZE': True,
                                    'RELATIVE_BASE': datetime(2012, 11, 13)})
        self.when_date_is_parsed(normalize_unicode(date_string))
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)),
        param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)),
    ])
    def test_parsing_with_time_zones(self, date_string, expected):
        self.given_parser(settings={'TO_TIMEZONE': 'UTC'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700', datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_parser(settings={'TO_TIMEZONE': 'utc'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string', 'Unable to parse: h'),
        param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'),
        param('24h ago', 'Unable to parse: h'),
        param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00', 'Unable to parser: 00:002015'),
        param('8 enero 2013 martes 7:03 AM EST 8 enero 2013 martes 7:03 AM EST', 'Unable to parse: 8')
    ])
    def test_dates_not_parsed(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, message)

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('10:00PM', datetime(2015, 2, 14, 22, 0)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
        param('15 february 15:00', datetime(2015, 2, 15, 15, 0)),
        param('3/3/50', datetime(1950, 3, 3)),
        param('3/3/94', datetime(1994, 3, 3)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_parser(settings={'PREFER_DATES_FROM': 'past', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 0)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
        param('3/3/50', datetime(2050, 3, 3)),
        param('3/3/94', datetime(2094, 3, 3)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'PREFER_DATES_FROM': 'current_period', 'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 31), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 25)),
        param('April 2015', today=datetime(2015, 1, 31), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 28)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_prefering_current_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'current', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 1), expected=datetime(2015, 2, 28)),
        param('February 2012', today=datetime(2015, 1, 1), expected=datetime(2012, 2, 29)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 31)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 30)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 30)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_prefering_last_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'last', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015', today=datetime(2015, 1, 8), expected=datetime(2015, 2, 1)),
        param('February 2012', today=datetime(2015, 1, 7), expected=datetime(2012, 2, 1)),
        param('March 2015', today=datetime(2015, 1, 25), expected=datetime(2015, 3, 1)),
        param('April 2015', today=datetime(2015, 1, 15), expected=datetime(2015, 4, 1)),
        param('April 2015', today=datetime(2015, 2, 28), expected=datetime(2015, 4, 1)),
        param('December 2014', today=datetime(2015, 2, 15), expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_prefering_first_day_of_month(self, date_string, today=None, expected=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': 'first', 'RELATIVE_BASE': today})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(self, prefer_day_of_month=None):
        self.given_parser(settings={'PREFER_DAY_OF_MONTH': prefer_day_of_month, 'RELATIVE_BASE': datetime(2015, 2, 12)})
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    def test_date_is_parsed_when_skip_tokens_are_supplied(self):
        self.given_parser(settings={'SKIP_TOKENS': ['de'], 'RELATIVE_BASE': datetime(2015, 2, 12)})
        self.when_date_is_parsed('24 April 2012 de')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015', 'day must be in 1..28'),
        param('32 January 2015', 'day must be in 1..31'),
        param('31 April 2015', 'day must be in 1..30'),
        param('31 June 2015', 'day must be in 1..30'),
        param('31 September 2015', 'day must be in 1..30'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, ['day is out of range for month', message])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000', languages=['fr'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=['en'], expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000', languages=[], expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string, languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages, settings={'PREFER_LANGUAGE_DATE_ORDER': False})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('15-12-18 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YMD'),
        param('15-18-12 06:00', expected=datetime(2015, 12, 18, 6, 0), order='YDM'),
        param('10-11-12 06:00', expected=datetime(2012, 10, 11, 6, 0), order='MDY'),
        param('10-11-12 06:00', expected=datetime(2011, 10, 12, 6, 0), order='MYD'),
        param('10-11-12 06:00', expected=datetime(2011, 12, 10, 6, 0), order='DYM'),
        param('15-12-18 06:00', expected=datetime(2018, 12, 15, 6, 0), order='DMY'),
        param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'),
        param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'),
        param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'),
        param('2016 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'),
        param('16 july 13.', expected=datetime(2016, 7, 13, 0, 0), order='YMD'),
    ])
    def test_order(self, date_string, expected=None, order=None):
        self.given_parser(settings={'DATE_ORDER': order})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset))
        )

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result
            return wrapped

        self.add_patch(patch.object(date_parser,
                                    'parse',
                                    collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result, "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])
Exemple #43
0
# -*- coding: utf-8 -*-
import scrapy
import html2text
from dateparser.date import DateDataParser
import dateparser
import logging

dparser = DateDataParser(languages=['en'], try_previous_locales=False)

# Set html2text configuration
html2text.config.IGNORE_ANCHORS = True
html2text.config.IGNORE_IMAGES = True
html2text.config.IGNORE_EMPHASIS = True
html2text.config.BODY_WIDTH = 0


class EastAfrican(scrapy.Spider):
    """
    Spider for the local news site EastAfrican. Works the same for

    Business Daily (africa) --> contextsIds=539444 -->  74196 artikelen
    The citizen (Tanzania) --> contextsIds=1765046 --> 71839 artikelen
    Daily Nation kenya --> contextsIds=1148 --> 474712  artikelen
    The east african Kenya -->contextsIds=2456  52513 artikelen (vanaf deze site werken)
    Daily Monitor Uganda --> contextsIds=691150 --> 174375 artikelen
    """

    name = "Kenya_EastAfrican_spider"
    download_delay = 2

    def start_requests(self):
class TestFreshnessDateDataParser(BaseTestCase):
    def setUp(self):
        super(TestFreshnessDateDataParser, self).setUp()
        self.now = datetime(2014, 9, 1, 10, 30)
        self.date_string = NotImplemented
        self.parser = NotImplemented
        self.result = NotImplemented
        self.freshness_parser = NotImplemented
        self.freshness_result = NotImplemented
        self.date = NotImplemented
        self.time = NotImplemented

    @parameterized.expand([
        # English dates
        param('yesterday', ago={'days': 1}, period='day'),
        param('the day before yesterday', ago={'days': 2}, period='day'),
        param('today', ago={'days': 0}, period='day'),
        param('an hour ago', ago={'hours': 1}, period='day'),
        param('about an hour ago', ago={'hours': 1}, period='day'),
        param('a day ago', ago={'days': 1}, period='day'),
        param('a week ago', ago={'weeks': 1}, period='week'),
        param('one week ago', ago={'weeks': 1}, period='week'),
        param('2 hours ago', ago={'hours': 2}, period='day'),
        param('about 23 hours ago', ago={'hours': 23}, period='day'),
        param('1 year 2 months', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 year, 09 months,01 weeks',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 year 11 months',
              ago={
                  'years': 1,
                  'months': 11
              },
              period='month'),
        param('1 year 12 months',
              ago={
                  'years': 1,
                  'months': 12
              },
              period='month'),
        param('15 hr', ago={'hours': 15}, period='day'),
        param('15 hrs', ago={'hours': 15}, period='day'),
        param('2 min', ago={'minutes': 2}, period='day'),
        param('2 mins', ago={'minutes': 2}, period='day'),
        param('3 sec', ago={'seconds': 3}, period='day'),
        param('1000 years ago', ago={'years': 1000}, period='year'),
        param('2013 years ago', ago={'years': 2013},
              period='year'),  # We've fixed .now in setUp
        param('5000 months ago',
              ago={
                  'years': 416,
                  'months': 8
              },
              period='month'),
        param('{} months ago'.format(2013 * 12 + 8),
              ago={
                  'years': 2013,
                  'months': 8
              },
              period='month'),
        param('1 year, 1 month, 1 week, 1 day, 1 hour and 1 minute ago',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),
        param('just now', ago={'seconds': 0}, period='day'),

        # French dates
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd’hui", ago={'days': 0}, period='day'),
        param("Aujourdʼhui", ago={'days': 0}, period='day'),
        param("Aujourdʻhui", ago={'days': 0}, period='day'),
        param("Aujourd՚hui", ago={'days': 0}, period='day'),
        param("Aujourdꞌhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Aujourd′hui", ago={'days': 0}, period='day'),
        param("Aujourd‵hui", ago={'days': 0}, period='day'),
        param("Aujourdʹhui", ago={'days': 0}, period='day'),
        param("Aujourd'hui", ago={'days': 0}, period='day'),
        param("Hier", ago={'days': 1}, period='day'),
        param("Avant-hier", ago={'days': 2}, period='day'),
        param('Il ya un jour', ago={'days': 1}, period='day'),
        param('Il ya une heure', ago={'hours': 1}, period='day'),
        param('Il ya 2 heures', ago={'hours': 2}, period='day'),
        param('Il ya environ 23 heures', ago={'hours': 23}, period='day'),
        param('1 an 2 mois', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 année, 09 mois, 01 semaines',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 an 11 mois', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('Il ya 1 an, 1 mois, 1 semaine, 1 jour, 1 heure et 1 minute',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),
        param('Il y a 40 min', ago={'minutes': 40}, period='day'),

        # German dates
        param('Heute', ago={'days': 0}, period='day'),
        param('Gestern', ago={'days': 1}, period='day'),
        param('vorgestern', ago={'days': 2}, period='day'),
        param('vor einem Tag', ago={'days': 1}, period='day'),
        param('vor einer Stunden', ago={'hours': 1}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('Vor 2 Stunden', ago={'hours': 2}, period='day'),
        param('vor etwa 23 Stunden', ago={'hours': 23}, period='day'),
        param('1 Jahr 2 Monate', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 Jahr, 09 Monate, 01 Wochen',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 Jahr 11 Monate',
              ago={
                  'years': 1,
                  'months': 11
              },
              period='month'),
        param('vor 29h', ago={'hours': 29}, period='day'),
        param('vor 29m', ago={'minutes': 29}, period='day'),
        param('1 Jahr, 1 Monat, 1 Woche, 1 Tag, 1 Stunde und 1 Minute',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Italian dates
        param('oggi', ago={'days': 0}, period='day'),
        param('ieri', ago={'days': 1}, period='day'),
        param('2 ore fa', ago={'hours': 2}, period='day'),
        param('circa 23 ore fa', ago={'hours': 23}, period='day'),
        param('1 anno 2 mesi', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 anno, 09 mesi, 01 settimane',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 anno 11 mesi', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('1 anno, 1 mese, 1 settimana, 1 giorno, 1 ora e 1 minuto fa',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Portuguese dates
        param('ontem', ago={'days': 1}, period='day'),
        param('anteontem', ago={'days': 2}, period='day'),
        param('hoje', ago={'days': 0}, period='day'),
        param('uma hora atrás', ago={'hours': 1}, period='day'),
        param('1 segundo atrás', ago={'seconds': 1}, period='day'),
        param('um dia atrás', ago={'days': 1}, period='day'),
        param('uma semana atrás', ago={'weeks': 1}, period='week'),
        param('2 horas atrás', ago={'hours': 2}, period='day'),
        param('cerca de 23 horas atrás', ago={'hours': 23}, period='day'),
        param('1 ano 2 meses', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 ano, 09 meses, 01 semanas',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 ano 11 meses', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('1 ano, 1 mês, 1 semana, 1 dia, 1 hora e 1 minuto atrás',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Turkish dates
        param('Dün', ago={'days': 1}, period='day'),
        param('Bugün', ago={'days': 0}, period='day'),
        param('2 saat önce', ago={'hours': 2}, period='day'),
        param('yaklaşık 23 saat önce', ago={'hours': 23}, period='day'),
        param('1 yıl 2 ay', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 yıl, 09 ay, 01 hafta',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 yıl 11 ay', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('1 yıl, 1 ay, 1 hafta, 1 gün, 1 saat ve 1 dakika önce',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Russian dates
        param('сегодня', ago={'days': 0}, period='day'),
        param('Вчера в', ago={'days': 1}, period='day'),
        param('вчера', ago={'days': 1}, period='day'),
        param('2 часа назад', ago={'hours': 2}, period='day'),
        param('час назад', ago={'hours': 1}, period='day'),
        param('минуту назад', ago={'minutes': 1}, period='day'),
        param('2 ч. 21 мин. назад',
              ago={
                  'hours': 2,
                  'minutes': 21
              },
              period='day'),
        param('около 23 часов назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяца', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 год, 09 месяцев, 01 недель',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 год 11 месяцев',
              ago={
                  'years': 1,
                  'months': 11
              },
              period='month'),
        param('1 год, 1 месяц, 1 неделя, 1 день, 1 час и 1 минуту назад',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Czech dates
        param('Dnes', ago={'days': 0}, period='day'),
        param('Včera', ago={'days': 1}, period='day'),
        param('Předevčírem', ago={'days': 2}, period='day'),
        param('Před 2 hodinami', ago={'hours': 2}, period='day'),
        param('před přibližně 23 hodin', ago={'hours': 23}, period='day'),
        param('1 rok 2 měsíce', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 rok, 09 měsíců, 01 týdnů',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 rok 11 měsíců',
              ago={
                  'years': 1,
                  'months': 11
              },
              period='month'),
        param('3 dny', ago={'days': 3}, period='day'),
        param('3 hodiny', ago={'hours': 3}, period='day'),
        param('1 rok, 1 měsíc, 1 týden, 1 den, 1 hodina, 1 minuta před',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Spanish dates
        param('anteayer', ago={'days': 2}, period='day'),
        param('ayer', ago={'days': 1}, period='day'),
        param('hoy', ago={'days': 0}, period='day'),
        param('hace una hora', ago={'hours': 1}, period='day'),
        param('Hace un día', ago={'days': 1}, period='day'),
        param('Hace una semana', ago={'weeks': 1}, period='week'),
        param('Hace 2 horas', ago={'hours': 2}, period='day'),
        param('Hace cerca de 23 horas', ago={'hours': 23}, period='day'),
        param('1 año 2 meses', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 año, 09 meses, 01 semanas',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 año 11 meses', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('Hace 1 año, 1 mes, 1 semana, 1 día, 1 hora y 1 minuto',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Chinese dates
        param('昨天', ago={'days': 1}, period='day'),
        param('前天', ago={'days': 2}, period='day'),
        param('2小时前', ago={'hours': 2}, period='day'),
        param('约23小时前', ago={'hours': 23}, period='day'),
        param('1年2个月', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1年09月,01周',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1年11个月', ago={
            'years': 1,
            'months': 11
        }, period='month'),
        param('1年,1月,1周,1天,1小时,1分钟前',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Arabic dates
        param('اليوم', ago={'days': 0}, period='day'),
        param('يوم أمس', ago={'days': 1}, period='day'),
        param('منذ يومين', ago={'days': 2}, period='day'),
        param('منذ 3 أيام', ago={'days': 3}, period='day'),
        param('منذ 21 أيام', ago={'days': 21}, period='day'),
        param('1 عام, 1 شهر, 1 أسبوع, 1 يوم, 1 ساعة, 1 دقيقة',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Thai dates
        param('วันนี้', ago={'days': 0}, period='day'),
        param('เมื่อวานนี้', ago={'days': 1}, period='day'),
        param('2 วัน', ago={'days': 2}, period='day'),
        param('2 ชั่วโมง', ago={'hours': 2}, period='day'),
        param('23 ชม.', ago={'hours': 23}, period='day'),
        param('2 สัปดาห์ 3 วัน', ago={
            'weeks': 2,
            'days': 3
        }, period='day'),
        param('1 ปี 9 เดือน 1 สัปดาห์',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('1 ปี 1 เดือน 1 สัปดาห์ 1 วัน 1 ชั่วโมง 1 นาที',
              ago={
                  'years': 1,
                  'months': 1,
                  'weeks': 1,
                  'days': 1,
                  'hours': 1,
                  'minutes': 1
              },
              period='day'),

        # Vietnamese dates
        param('Hôm nay', ago={'days': 0}, period='day'),
        param('Hôm qua', ago={'days': 1}, period='day'),
        param('2 giờ', ago={'hours': 2}, period='day'),
        param('2 tuần 3 ngày', ago={
            'weeks': 2,
            'days': 3
        }, period='day'),
        # following test unsupported, refer to discussion at:
        # http://github.com/scrapinghub/dateparser/issues/33
        #param('1 năm 1 tháng 1 tuần 1 ngày 1 giờ 1 chút',
        #      ago={'years': 1, 'months': 1, 'weeks': 1, 'days': 1, 'hours': 1, 'minutes': 1},
        #      period='day'),

        # Belarusian dates
        param('сёння', ago={'days': 0}, period='day'),
        param('учора ў', ago={'days': 1}, period='day'),
        param('ўчора', ago={'days': 1}, period='day'),
        param('пазаўчора', ago={'days': 2}, period='day'),
        param('2 гадзіны таму назад', ago={'hours': 2}, period='day'),
        param('2 гадзіны таму', ago={'hours': 2}, period='day'),
        param('гадзіну назад', ago={'hours': 1}, period='day'),
        param('хвіліну таму', ago={'minutes': 1}, period='day'),
        param('2 гадзіны 21 хвіл. назад',
              ago={
                  'hours': 2,
                  'minutes': 21
              },
              period='day'),
        param('каля 23 гадзін назад', ago={'hours': 23}, period='day'),
        param('1 год 2 месяцы', ago={
            'years': 1,
            'months': 2
        }, period='month'),
        param('1 год, 09 месяцаў, 01 тыдзень',
              ago={
                  'years': 1,
                  'months': 9,
                  'weeks': 1
              },
              period='week'),
        param('2 гады 3 месяцы', ago={
            'years': 2,
            'months': 3
        }, period='month'),
        param(
            '5 гадоў, 1 месяц, 6 тыдняў, 3 дні, 5 гадзін 1 хвіліну і 3 секунды таму назад',
            ago={
                'years': 5,
                'months': 1,
                'weeks': 6,
                'days': 3,
                'hours': 5,
                'minutes': 1,
                'seconds': 3
            },
            period='day'),

        # Polish dates
        param("wczoraj", ago={'days': 1}, period='day'),
        param("1 godz. 2 minuty temu",
              ago={
                  'hours': 1,
                  'minutes': 2
              },
              period='day'),
        param(
            "2 lata, 3 miesiące, 1 tydzień, 2 dni, 4 godziny, 15 minut i 25 sekund temu",
            ago={
                'years': 2,
                'months': 3,
                'weeks': 1,
                'days': 2,
                'hours': 4,
                'minutes': 15,
                'seconds': 25
            },
            period='day'),
        param("2 minuty temu", ago={'minutes': 2}, period='day'),
        param("15 minut temu", ago={'minutes': 15}, period='day'),
    ])
    def test_relative_dates(self, date_string, ago, period):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_date_obj_is_exactly_this_time_ago(ago)
        self.then_period_is(period)

    @parameterized.expand([
        param('15th of Aug, 2014 Diane Bennett'),
    ])
    def test_insane_dates(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_not_parsed()

    @parameterized.expand([
        param('5000 years ago'),
        param('2014 years ago'),  # We've fixed .now in setUp
        param('{} months ago'.format(2013 * 12 + 9)),
    ])
    def test_dates_not_supported_by_date_time(self, date_string):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_raised(
            ValueError, ['year is out of range', "('year must be in 1..9999'"])

    @parameterized.expand([
        param('несколько секунд назад', boundary={'seconds': 45},
              period='day'),
        param('há alguns segundos', boundary={'seconds': 45}, period='day'),
    ])
    def test_inexplicit_dates(self, date_string, boundary, period):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_period_is(period)
        self.then_date_obj_is_between(self.now - timedelta(**boundary),
                                      self.now)

    @parameterized.expand([
        param('Today at 9 pm', date(2014, 9, 1), time(21, 0)),
        param('Today at 11:20 am', date(2014, 9, 1), time(11, 20)),
        param('Yesterday 1:20 pm', date(2014, 8, 31), time(13, 20)),
        param('the day before yesterday 16:50', date(2014, 8, 30),
              time(16, 50)),
        param('2 Tage 18:50', date(2014, 8, 30), time(18, 50)),
        param('1 day ago at 2 PM', date(2014, 8, 31), time(14, 0)),
        param('Dnes v 12:40', date(2014, 9, 1), time(12, 40)),
        param('1 week ago at 12:00 am', date(2014, 8, 25), time(0, 0)),
    ])
    def test_freshness_date_with_time(self, date_string, date, time):
        self.given_parser()
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', 'Asia/Karachi', date(2014, 9, 1), time(13, 30)),
        param('3 hours ago', 'Europe/Paris', date(2014, 9, 1), time(9, 30)),
        param('5 hours ago', 'US/Eastern', date(2014, 9, 1),
              time(1, 30)),  # date in DST range
        param('Today at 9 pm', 'Asia/Karachi', date(2014, 9, 1),
              time(21, 0)),  # time given, hence, no shift applies
    ])
    def test_freshness_date_with_pytz_timezones(self, date_string, timezone,
                                                date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', 'PKT', date(2014, 9, 1), time(13, 30)),
        param('5 hours ago', 'EST', date(2014, 9, 1), time(0, 30)),
        param('3 hours ago', 'MET', date(2014, 9, 1), time(8, 30)),
    ])
    def test_freshness_date_with_timezone_abbreviations(
            self, date_string, timezone, date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    @parameterized.expand([
        param('2 hours ago', '+05:00', date(2014, 9, 1), time(13, 30)),
        param('5 hours ago', '-05:00', date(2014, 9, 1), time(0, 30)),
        param('3 hours ago', '+01:00', date(2014, 9, 1), time(8, 30)),
    ])
    def test_freshness_date_with_timezone_utc_offset(self, date_string,
                                                     timezone, date, time):
        self.given_parser(settings={'TIMEZONE': timezone})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_date_is(date)
        self.then_time_is(time)

    def given_date_string(self, date_string):
        self.date_string = date_string

    def given_parser(self, settings=None):
        def collecting_get_date_data(get_date_data):
            @wraps(get_date_data)
            def wrapped(*args, **kwargs):
                self.freshness_result = get_date_data(*args, **kwargs)
                return self.freshness_result

            return wrapped

        self.add_patch(
            patch.object(
                freshness_date_parser, 'get_date_data',
                collecting_get_date_data(freshness_date_parser.get_date_data)))

        self.freshness_parser = Mock(wraps=freshness_date_parser)
        self.add_patch(patch.object(self.freshness_parser, 'now', self.now))

        dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime)
        dt_mock.utcnow = Mock(return_value=self.now)
        self.add_patch(
            patch('dateparser.freshness_date_parser.datetime', new=dt_mock))
        self.add_patch(
            patch('dateparser.date.freshness_date_parser',
                  new=self.freshness_parser))
        self.parser = DateDataParser(settings=settings)

    def when_date_is_parsed(self):
        try:
            self.result = self.parser.get_date_data(self.date_string)
        except Exception as error:
            self.error = error

    def then_date_is(self, date):
        self.assertEqual(date, self.result['date_obj'].date())

    def then_time_is(self, time):
        self.assertEqual(time, self.result['date_obj'].time())

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_is_between(self, low_boundary, high_boundary):
        self.assertGreater(self.result['date_obj'], low_boundary)
        self.assertLess(self.result['date_obj'], high_boundary)

    def then_date_obj_is_exactly_this_time_ago(self, ago):
        self.assertEqual(self.now - relativedelta(**ago),
                         self.result['date_obj'])

    def then_date_was_not_parsed(self):
        self.assertIsNone(self.result['date_obj'],
                          '"%s" should not be parsed' % self.date_string)

    def then_date_was_parsed_by_freshness_parser(self):
        self.assertEqual(self.result, self.freshness_result)

    def then_error_was_not_raised(self):
        self.assertEqual(NotImplemented, self.error)
Exemple #45
0
class TestDateParser(BaseTestCase):
    def setUp(self):
        super(TestDateParser, self).setUp()
        self.parser = NotImplemented
        self.result = NotImplemented
        self.date_parser = NotImplemented
        self.date_result = NotImplemented

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('Tues 9th Aug, 2015', datetime(2015, 8, 9)),
        param('10:04am', datetime(2012, 11, 13, 10, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm', datetime(2014, 11, 25, 22, 17)),
        param('Wed Aug 05 12:00:00 2015', datetime(2015, 8, 5, 12, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        param('21 January 2012 13:11:23.678',
              datetime(2012, 1, 21, 13, 11, 23, 678000)),
        param('1/1/16 9:02:43.1', datetime(2016, 1, 1, 9, 2, 43, 100000)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14, 40)),
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14,
                       14)),  # bpsosrcs.wordpress.com  # NOQA
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін',
              datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 13, 14, 4)),
        # Tagalog dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
        param('2016020417:10', datetime(2016, 2, 4, 17, 10)),
        # Chinese dates
        param('2015年04月08日10:05', datetime(2015, 4, 8, 10, 5)),
        param('2012年12月20日10:35', datetime(2012, 12, 20, 10, 35)),
        param('2016年06月30日09时30分', datetime(2016, 6, 30, 9, 30)),
        param('2016年6月2911:30', datetime(2016, 6, 29, 11, 30)),
        param('2016年6月29', datetime(2016, 6, 29, 0, 0)),
        param('2016年 2月 5日', datetime(2016, 2, 5, 0, 0)),
        param('2016年9月14日晚8:00', datetime(2016, 9, 14, 20, 0)),
        # Bulgarian
        param('25 ян 2016', datetime(2016, 1, 25, 0, 0)),
        param('23 декември 2013 15:10:01', datetime(2013, 12, 23, 15, 10, 1)),
        # Bangla dates
        param('[সেপ্টেম্বর] 04, 2014.', datetime(2014, 9, 4)),
        param('মঙ্গলবার জুলাই 22, 2014', datetime(2014, 7, 22)),
        param('শুক্রবার', datetime(2012, 11, 9)),
        param('শুক্র, 12 ডিসেম্বর 2014 10:55:50',
              datetime(2014, 12, 12, 10, 55, 50)),
        param('1লা জানুয়ারী 2015', datetime(2015, 1, 1)),
        param('25শে মার্চ 1971', datetime(1971, 3, 25)),
        param('8ই মে 2002', datetime(2002, 5, 8)),
        param('10:06am ডিসেম্বর 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 ফেব্রুয়ারী 2013 সাল 09:10', datetime(2013, 2, 19, 9, 10)),
        # Hindi dates
        param('11 जुलाई 1994, 11:12', datetime(1994, 7, 11, 11, 12)),
        param('१७ अक्टूबर २०१८', datetime(2018, 10, 17, 0, 0)),
        param('12 जनवरी  1997 11:08 अपराह्न', datetime(1997, 1, 12, 23, 8)),
        # Georgian dates
        param('2011 წლის 17 მარტი, ოთხშაბათი', datetime(2011, 3, 17, 0, 0)),
        param('2015 წ. 12 ივნ, 15:34', datetime(2015, 6, 12, 15, 34))
    ])
    def test_dates_parsing(self, date_string, expected):
        self.given_parser(settings={
            'NORMALIZE': False,
            'RELATIVE_BASE': datetime(2012, 11, 13)
        })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def test_stringified_datetime_should_parse_fine(self):
        expected_date = datetime(2012, 11, 13, 10, 15, 5, 330256)
        self.given_parser(settings={'RELATIVE_BASE': expected_date})
        date_string = str(self.parser.get_date_data('today')['date_obj'])
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected_date)

    @parameterized.expand([
        # English dates
        param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
        param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
        param('10:04am', datetime(2012, 11, 13, 10, 4)),
        param('Friday', datetime(2012, 11, 9)),
        param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
        param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
        param('Nov 25 2014 10:17 pm', datetime(2014, 11, 25, 22, 17)),
        param('Wed Aug 05 12:00:00 2015', datetime(2015, 8, 5, 12, 0)),
        param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
        param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
        param('December 10, 2014, 11:02:21 pm',
              datetime(2014, 12, 10, 23, 2, 21)),
        param('8:25 a.m. Dec. 12, 2014', datetime(2014, 12, 12, 8, 25)),
        param('2:21 p.m., December 11, 2014', datetime(2014, 12, 11, 14, 21)),
        param('Fri, 12 Dec 2014 10:55:50', datetime(2014, 12, 12, 10, 55, 50)),
        param('20 Mar 2013 10h11', datetime(2013, 3, 20, 10, 11)),
        param('10:06am Dec 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 February 2013 year 09:10', datetime(2013, 2, 19, 9, 10)),
        # French dates
        param('11 Mai 2014', datetime(2014, 5, 11)),
        param('dimanche, 11 Mai 2014', datetime(2014, 5, 11)),
        param('22 janvier 2015 à 14h40', datetime(2015, 1, 22, 14,
                                                  40)),  # wrong
        param('Dimanche 1er Février à 21:24', datetime(2012, 2, 1, 21, 24)),
        param('vendredi, décembre 5 2014.', datetime(2014, 12, 5, 0, 0)),
        param('le 08 Déc 2014 15:11', datetime(2014, 12, 8, 15, 11)),
        param('Le 11 Décembre 2014 à 09:00', datetime(2014, 12, 11, 9, 0)),
        param('fév 15, 2013', datetime(2013, 2, 15, 0, 0)),
        param('Jeu 15:12', datetime(2012, 11, 8, 15, 12)),
        # Spanish dates
        param('Martes 21 de Octubre de 2014', datetime(2014, 10, 21)),
        param('Miércoles 20 de Noviembre de 2013', datetime(2013, 11, 20)),
        param('12 de junio del 2012', datetime(2012, 6, 12)),
        param('13 Ago, 2014', datetime(2014, 8, 13)),
        param('13 Septiembre, 2014', datetime(2014, 9, 13)),
        param('11 Marzo, 2014', datetime(2014, 3, 11)),
        param('julio 5, 2015 en 1:04 pm', datetime(2015, 7, 5, 13, 4)),
        param('Vi 17:15', datetime(2012, 11, 9, 17, 15)),
        # Dutch dates
        param('11 augustus 2014', datetime(2014, 8, 11)),
        param('14 januari 2014', datetime(2014, 1, 14)),
        param('vr jan 24, 2014 12:49', datetime(2014, 1, 24, 12, 49)),
        # Italian dates
        param('16 giu 2014', datetime(2014, 6, 16)),
        param('26 gennaio 2014', datetime(2014, 1, 26)),
        param('Ven 18:23', datetime(2012, 11, 9, 18, 23)),
        # Portuguese dates
        param('sexta-feira, 10 de junho de 2014 14:52',
              datetime(2014, 6, 10, 14, 52)),
        param('13 Setembro, 2014', datetime(2014, 9, 13)),
        param('Sab 3:03', datetime(2012, 11, 10, 3, 3)),
        # Russian dates
        param('10 мая', datetime(2012, 5, 10)),  # forum.codenet.ru
        param('26 апреля', datetime(2012, 4, 26)),
        param('20 ноября 2013', datetime(2013, 11, 20)),
        param('28 октября 2014 в 07:54', datetime(2014, 10, 28, 7, 54)),
        param('13 января 2015 г. в 13:34', datetime(2015, 1, 13, 13, 34)),
        param('09 августа 2012', datetime(2012, 8, 9, 0, 0)),
        param('Авг 26, 2015 15:12', datetime(2015, 8, 26, 15, 12)),
        param('2 Декабрь 95 11:15', datetime(1995, 12, 2, 11, 15)),
        param('13 янв. 2005 19:13', datetime(2005, 1, 13, 19, 13)),
        param('13 авг. 2005 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005г. 19:13', datetime(2005, 8, 13, 19, 13)),
        param('13 авг. 2005 г. 19:13', datetime(2005, 8, 13, 19, 13)),
        # Turkish dates
        param('11 Ağustos, 2014', datetime(2014, 8, 11)),
        param('08.Haziran.2014, 11:07', datetime(2014, 6, 8, 11,
                                                 7)),  # forum.andronova.net
        param('17.Şubat.2014, 17:51', datetime(2014, 2, 17, 17, 51)),
        param('14-Aralık-2012, 20:56', datetime(2012, 12, 14, 20,
                                                56)),  # forum.ceviz.net
        # Romanian dates
        param('13 iunie 2013', datetime(2013, 6, 13)),
        param('14 aprilie 2014', datetime(2014, 4, 14)),
        param('18 martie 2012', datetime(2012, 3, 18)),
        param('S 14:14', datetime(2012, 11, 10, 14, 14)),
        param('12-Iun-2013', datetime(2013, 6, 12)),
        # German dates
        param('21. Dezember 2013', datetime(2013, 12, 21)),
        param('19. Februar 2012', datetime(2012, 2, 19)),
        param('26. Juli 2014', datetime(2014, 7, 26)),
        param('18.10.14 um 22:56 Uhr', datetime(2014, 10, 18, 22, 56)),
        param('12-Mär-2014', datetime(2014, 3, 12)),
        param('Mit 13:14', datetime(2012, 11, 7, 13, 14)),
        # Czech dates
        param('pon 16. čer 2014 10:07:43', datetime(2014, 6, 16, 10, 7, 43)),
        param('13 Srpen, 2014', datetime(2014, 8, 13)),
        param('čtv 14. lis 2013 12:38:43', datetime(2013, 11, 14, 12, 38, 43)),
        # Thai dates
        param('ธันวาคม 11, 2014, 08:55:08 PM',
              datetime(2014, 12, 11, 20, 55, 8)),
        param('22 พฤษภาคม 2012, 22:12', datetime(2012, 5, 22, 22, 12)),
        param('11 กุมภา 2020, 8:13 AM', datetime(2020, 2, 11, 8, 13)),
        param('1 เดือนตุลาคม 2005, 1:00 AM', datetime(2005, 10, 1, 1, 0)),
        param('11 ก.พ. 2020, 1:13 pm', datetime(2020, 2, 11, 13, 13)),
        # Vietnamese dates
        param('Thứ năm', datetime(2012, 11, 8)),  # Thursday
        param('Thứ sáu', datetime(2012, 11, 9)),  # Friday
        param('Tháng Mười Hai 29, 2013, 14:14',
              datetime(2013, 12, 29, 14,
                       14)),  # bpsosrcs.wordpress.com  # NOQA
        param('05 Tháng một 2015 - 03:54 AM', datetime(2015, 1, 5, 3, 54)),
        # Belarusian dates
        param('11 траўня', datetime(2012, 5, 11)),
        param('4 мая', datetime(2012, 5, 4)),
        param('Чацвер 06 жніўня 2015', datetime(2015, 8, 6)),
        param('Нд 14 сакавіка 2015 у 7 гадзін 10 хвілін',
              datetime(2015, 3, 14, 7, 10)),
        param('5 жніўня 2015 года у 13:34', datetime(2015, 8, 5, 13, 34)),
        # Ukrainian dates
        param('2015-кві-12', datetime(2015, 4, 12)),
        param('21 чер 2013 3:13', datetime(2013, 6, 21, 3, 13)),
        param('12 лютого 2012, 13:12:23', datetime(2012, 2, 12, 13, 12, 23)),
        param('вів о 14:04', datetime(2012, 11, 13, 14, 4)),
        # Filipino dates
        param('12 Hulyo 2003 13:01', datetime(2003, 7, 12, 13, 1)),
        param('1978, 1 Peb, 7:05 PM', datetime(1978, 2, 1, 19, 5)),
        param('2 hun', datetime(2012, 6, 2)),
        param('Lin 16:16', datetime(2012, 11, 11, 16, 16)),
        # Japanese dates
        param('2016年3月20日(日) 21時40分', datetime(2016, 3, 20, 21, 40)),
        param("2016年3月20日 21時40分", datetime(2016, 3, 20, 21, 40)),
        # Bangla dates
        param('[সেপ্টেম্বর] 04, 2014.', datetime(2014, 9, 4)),
        param('মঙ্গলবার জুলাই 22, 2014', datetime(2014, 7, 22)),
        param('শুক্রবার', datetime(2012, 11, 9)),
        param('শুক্র, 12 ডিসেম্বর 2014 10:55:50',
              datetime(2014, 12, 12, 10, 55, 50)),
        param('1লা জানুয়ারী 2015', datetime(2015, 1, 1)),
        param('25শে মার্চ 1971', datetime(1971, 3, 25)),
        param('8ই মে 2002', datetime(2002, 5, 8)),
        param('10:06am ডিসেম্বর 11, 2014', datetime(2014, 12, 11, 10, 6)),
        param('19 ফেব্রুয়ারী 2013 সাল 09:10', datetime(2013, 2, 19, 9, 10)),
        # Numeric dates
        param('06-17-2014', datetime(2014, 6, 17)),
        param('13/03/2014', datetime(2014, 3, 13)),
        param('11. 12. 2014, 08:45:39', datetime(2014, 11, 12, 8, 45, 39)),
        # Miscellaneous dates
        param('1 Ni 2015', datetime(2015, 4, 1, 0, 0)),
        param('1 Mar 2015', datetime(2015, 3, 1, 0, 0)),
        param('1 сер 2015', datetime(2015, 8, 1, 0, 0)),
        # Bulgarian
        param('24 ян 2015г.', datetime(2015, 1, 24, 0, 0)),
        # Hindi dates
        param('बुधवार 24 मई 1997 12:09', datetime(1997, 5, 24, 12, 9)),
        param('28 दिसम्बर 2000 , 01:09:08', datetime(2000, 12, 28, 1, 9, 8)),
        param('१६ दिसम्बर १९७१', datetime(1971, 12, 16, 0, 0)),
        param('सन् 1989 11 फ़रवरी 09:43', datetime(1989, 2, 11, 9, 43)),
    ])
    def test_dates_parsing_with_normalization(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={
            'NORMALIZE': True,
            'RELATIVE_BASE': datetime(2012, 11, 13)
        })
        self.when_date_is_parsed(normalize_unicode(date_string))
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)),
        param('17th October, 2034 @ 01:08 am PDT',
              datetime(2034, 10, 17, 8, 8)),
        param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)),
        param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)),
    ])
    def test_parsing_with_time_zones_and_converting_to_UTC(
            self, date_string, expected):
        self.given_parser(settings={'TO_TIMEZONE': 'UTC'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_timezone_parsed_is('UTC')
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('Sep 03 2014 | 4:32 pm EDT', 'EDT', datetime(2014, 9, 3, 16,
                                                           32)),
        param('17th October, 2034 @ 01:08 am PDT', 'PDT',
              datetime(2034, 10, 17, 1, 8)),
        param('15 May 2004 23:24 EDT', 'EDT', datetime(2004, 5, 15, 23, 24)),
        param('08/17/14 17:00 (PDT)', 'PDT', datetime(2014, 8, 17, 17, 0)),
        param('15 May 2004 16:10 -0400', '-04:00',
              datetime(2004, 5, 15, 16, 10)),
        param('1999-12-31 19:00:00 -0500', '-05:00',
              datetime(1999, 12, 31, 19, 0)),
        param('1999-12-31 19:00:00 +0500', '+05:00',
              datetime(1999, 12, 31, 19, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700', '-07:00',
              datetime(2005, 9, 9, 13, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000', '+00:00',
              datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_dateparser_should_return_tzaware_date_when_tz_info_present_in_date_string(
            self, date_string, timezone_str, expected):
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_timezone_parsed_is(timezone_str)
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', 'UTC', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', 'UTC', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', 'UTC',
              datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700', 'GMT',
              datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000', 'GMT',
              datetime(2005, 9, 9, 13, 51, 39)),
    ])
    def test_dateparser_should_return_date_in_setting_timezone_if_timezone_info_present_both_in_datestring_and_given_in_settings(
            self, date_string, setting_timezone, expected):
        self.given_parser(settings={'TIMEZONE': setting_timezone})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_timezone_parsed_is(setting_timezone)
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('15 May 2004 16:10 -0400', datetime(2004, 5, 15, 20, 10)),
        param('1999-12-31 19:00:00 -0500', datetime(2000, 1, 1, 0, 0)),
        param('1999-12-31 19:00:00 +0500', datetime(1999, 12, 31, 14, 0)),
        param('Fri, 09 Sep 2005 13:51:39 -0700',
              datetime(2005, 9, 9, 20, 51, 39)),
        param('Fri, 09 Sep 2005 13:51:39 +0000',
              datetime(2005, 9, 9, 13, 51, 39)),
        param('Fri Sep 23 2016 10:34:51 GMT+0800 (CST)',
              datetime(2016, 9, 23, 2, 34, 51)),
    ])
    def test_parsing_with_utc_offsets(self, date_string, expected):
        self.given_parser(settings={'TO_TIMEZONE': 'utc'})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_period_is('day')
        self.then_timezone_parsed_is('UTC')
        self.then_date_obj_exactly_is(expected)

    def test_empty_dates_string_is_not_parsed(self):
        self.when_date_is_parsed_by_date_parser('')
        self.then_error_was_raised(ValueError, ["Empty string"])

    @parameterized.expand([
        param('invalid date string', 'Unable to parse: invalid'),
        param('Aug 7, 2014Aug 7, 2014', 'Unable to parse: Aug'),
        param('24h ago', 'Unable to parse: h'),
        param('2015-03-17t16:37:51+00:002015-03-17t15:24:37+00:00',
              'Unable to parse: 00:002015'),
        param(
            '8 enero 2013 martes 7:03 AM EST 8 enero 2013 martes 7:03 AM EST',
            'Unable to parse: 8'),
        param('12/09/18567', 'Unable to parse: 18567'),
    ])
    def test_dates_not_parsed(self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError, message)

    @parameterized.expand([
        param('10 December', datetime(2014, 12, 10)),
        param('March', datetime(2014, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Monday', datetime(2015, 2, 9)),
        param('Sunday', datetime(2015, 2, 8)),  # current day
        param('10:00PM', datetime(2015, 2, 14, 22, 0)),
        param('16:10', datetime(2015, 2, 14, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
        param('15 february 15:00', datetime(2015, 2, 15, 15, 0)),
        param('3/3/50', datetime(1950, 3, 3)),
        param('3/3/94', datetime(1994, 3, 3)),
    ])
    def test_preferably_past_dates(self, date_string, expected):
        self.given_parser(
            settings={
                'PREFER_DATES_FROM': 'past',
                'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)
            })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 20)),
        param('Sunday', datetime(2015, 2, 22)),  # current day
        param('Monday', datetime(2015, 2, 16)),
        param('10:00PM', datetime(2015, 2, 15, 22, 0)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 16, 14, 5)),
        param('3/3/50', datetime(2050, 3, 3)),
        param('3/3/94', datetime(2094, 3, 3)),
    ])
    def test_preferably_future_dates(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(
            settings={
                'PREFER_DATES_FROM': 'future',
                'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)
            })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', datetime(2015, 12, 10)),
        param('March', datetime(2015, 3, 15)),
        param('Friday', datetime(2015, 2, 13)),
        param('Sunday', datetime(2015, 2, 15)),  # current weekday
        param('10:00PM', datetime(2015, 2, 15, 22, 00)),
        param('16:10', datetime(2015, 2, 15, 16, 10)),
        param('14:05', datetime(2015, 2, 15, 14, 5)),
    ])
    def test_dates_without_preference(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(
            settings={
                'PREFER_DATES_FROM': 'current_period',
                'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)
            })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 31),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 25)),
        param('April 2015',
              today=datetime(2015, 1, 31),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 28)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 15)),
    ])
    def test_dates_with_day_missing_preferring_current_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_parser(settings={
            'PREFER_DAY_OF_MONTH': 'current',
            'RELATIVE_BASE': today
        })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 1),
              expected=datetime(2015, 2, 28)),
        param('February 2012',
              today=datetime(2015, 1, 1),
              expected=datetime(2012, 2, 29)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 31)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 30)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 30)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 31)),
    ])
    def test_dates_with_day_missing_preferring_last_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_parser(settings={
            'PREFER_DAY_OF_MONTH': 'last',
            'RELATIVE_BASE': today
        })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('February 2015',
              today=datetime(2015, 1, 8),
              expected=datetime(2015, 2, 1)),
        param('February 2012',
              today=datetime(2015, 1, 7),
              expected=datetime(2012, 2, 1)),
        param('March 2015',
              today=datetime(2015, 1, 25),
              expected=datetime(2015, 3, 1)),
        param('April 2015',
              today=datetime(2015, 1, 15),
              expected=datetime(2015, 4, 1)),
        param('April 2015',
              today=datetime(2015, 2, 28),
              expected=datetime(2015, 4, 1)),
        param('December 2014',
              today=datetime(2015, 2, 15),
              expected=datetime(2014, 12, 1)),
    ])
    def test_dates_with_day_missing_preferring_first_day_of_month(
            self, date_string, today=None, expected=None):
        self.given_parser(settings={
            'PREFER_DAY_OF_MONTH': 'first',
            'RELATIVE_BASE': today
        })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param(prefer_day_of_month='current'),
        param(prefer_day_of_month='last'),
        param(prefer_day_of_month='first'),
    ])
    def test_that_day_preference_does_not_affect_dates_with_explicit_day(
            self, prefer_day_of_month=None):
        self.given_parser(
            settings={
                'PREFER_DAY_OF_MONTH': prefer_day_of_month,
                'RELATIVE_BASE': datetime(2015, 2, 12)
            })
        self.when_date_is_parsed('24 April 2012')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    def test_date_is_parsed_when_skip_tokens_are_supplied(self):
        self.given_parser(settings={
            'SKIP_TOKENS': ['de'],
            'RELATIVE_BASE': datetime(2015, 2, 12)
        })
        self.when_date_is_parsed('24 April 2012 de')
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(datetime(2012, 4, 24))

    @parameterized.expand([
        param('29 February 2015', 'day must be in 1..28'),
        param('32 January 2015', 'day must be in 1..31'),
        param('31 April 2015', 'day must be in 1..30'),
        param('31 June 2015', 'day must be in 1..30'),
        param('31 September 2015', 'day must be in 1..30'),
    ])
    def test_error_should_be_raised_for_invalid_dates_with_too_large_day_number(
            self, date_string, message):
        self.when_date_is_parsed_by_date_parser(date_string)
        self.then_error_was_raised(ValueError,
                                   ['day is out of range for month', message])

    @parameterized.expand([
        param('2015-05-02T10:20:19+0000',
              languages=['fr'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
        param('2015-05-02T10:20:19+0000',
              languages=['en'],
              expected=datetime(2015, 5, 2, 10, 20, 19)),
    ])
    def test_iso_datestamp_format_should_always_parse(self, date_string,
                                                      languages, expected):
        self.given_local_tz_offset(0)
        self.given_parser(languages=languages,
                          settings={'PREFER_LOCALE_DATE_ORDER': False})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.result['date_obj'] = self.result['date_obj'].replace(tzinfo=None)
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        # Epoch timestamps.
        param('1484823450', expected=datetime(2017, 1, 19, 10, 57, 30)),
        param('1436745600000', expected=datetime(2015, 7, 13, 0, 0)),
        param('1015673450', expected=datetime(2002, 3, 9, 11, 30, 50)),
        param('2016-09-23T02:54:32.845Z',
              expected=datetime(2016,
                                9,
                                23,
                                2,
                                54,
                                32,
                                845000,
                                tzinfo=StaticTzInfo('Z', timedelta(0))))
    ])
    def test_parse_timestamp(self, date_string, expected):
        self.given_local_tz_offset(0)
        self.given_parser(settings={'TO_TIMEZONE': 'UTC'})
        self.when_date_is_parsed(date_string)
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10 December', expected=datetime(2015, 12, 10), period='day'),
        param('March', expected=datetime(2015, 3, 15), period='month'),
        param('April', expected=datetime(2015, 4, 15), period='month'),
        param('December', expected=datetime(2015, 12, 15), period='month'),
        param('Friday', expected=datetime(2015, 2, 13), period='day'),
        param('Monday', expected=datetime(2015, 2, 9), period='day'),
        param('10:00PM', expected=datetime(2015, 2, 15, 22, 00), period='day'),
        param('16:10', expected=datetime(2015, 2, 15, 16, 10), period='day'),
        param('2014', expected=datetime(2014, 2, 15), period='year'),
        param('2008', expected=datetime(2008, 2, 15), period='year'),
    ])
    def test_extracted_period(self, date_string, expected=None, period=None):
        self.given_local_tz_offset(0)
        self.given_parser(
            settings={'RELATIVE_BASE': datetime(2015, 2, 15, 15, 30)})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('12th December 2019 19:00',
              expected=datetime(2019, 12, 12, 19, 0),
              period='time'),
        param('9 Jan 11 0:00',
              expected=datetime(2011, 1, 9, 0, 0),
              period='time'),
    ])
    def test_period_is_time_if_return_time_as_period_setting_applied_and_time_component_present(
            self, date_string, expected=None, period=None):
        self.given_parser(settings={'RETURN_TIME_AS_PERIOD': True})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('16:00', expected=datetime(2018, 12, 13, 16, 0), period='time'),
        param('Monday 7:15 AM',
              expected=datetime(2018, 12, 10, 7, 15),
              period='time'),
    ])
    def test_period_is_time_if_return_time_as_period_and_relative_base_settings_applied_and_time_component_present(
            self, date_string, expected=None, period=None):
        self.given_parser(
            settings={
                'RETURN_TIME_AS_PERIOD': True,
                'RELATIVE_BASE': datetime(2018, 12, 13, 15, 15)
            })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('12th March 2010',
              expected=datetime(2010, 3, 12, 0, 0),
              period='day'),
        param('21-12-19', expected=datetime(2019, 12, 21, 0, 0), period='day'),
    ])
    def test_period_is_day_if_return_time_as_period_setting_applied_and_time_component_is_not_present(
            self, date_string, expected=None, period=None):
        self.given_parser(settings={'RETURN_TIME_AS_PERIOD': True})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('16:00', expected=datetime(2017, 1, 10, 16, 0), period='day'),
        param('Monday 7:15 AM',
              expected=datetime(2017, 1, 9, 7, 15),
              period='day'),
    ])
    def test_period_is_day_if_return_time_as_period_setting_not_applied(
            self, date_string, expected=None, period=None):
        self.given_parser(
            settings={
                'RETURN_TIME_AS_PERIOD': False,
                'RELATIVE_BASE': datetime(2017, 1, 10, 15, 15)
            })
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)
        self.then_period_is(period)

    @parameterized.expand([
        param('15-12-18 06:00',
              expected=datetime(2015, 12, 18, 6, 0),
              order='YMD'),
        param('15-18-12 06:00',
              expected=datetime(2015, 12, 18, 6, 0),
              order='YDM'),
        param('10-11-12 06:00',
              expected=datetime(2012, 10, 11, 6, 0),
              order='MDY'),
        param('10-11-12 06:00',
              expected=datetime(2011, 10, 12, 6, 0),
              order='MYD'),
        param('10-11-12 06:00',
              expected=datetime(2011, 12, 10, 6, 0),
              order='DYM'),
        param('15-12-18 06:00',
              expected=datetime(2018, 12, 15, 6, 0),
              order='DMY'),
        param('12/09/08 04:23:15.567',
              expected=datetime(2008, 9, 12, 4, 23, 15, 567000),
              order='DMY'),
        param('10/9/1914 03:07:09.788888 pm',
              expected=datetime(1914, 10, 9, 15, 7, 9, 788888),
              order='MDY'),
        param('1-8-09 07:12:49 AM',
              expected=datetime(2009, 1, 8, 7, 12, 49),
              order='MDY'),
        param('201508', expected=datetime(2015, 8, 20, 0, 0), order='DYM'),
        param('201508', expected=datetime(2020, 8, 15, 0, 0), order='YDM'),
        param('201108', expected=datetime(2008, 11, 20, 0, 0), order='DMY'),
        param('2016 july 13.',
              expected=datetime(2016, 7, 13, 0, 0),
              order='YMD'),
        param('16 july 13.', expected=datetime(2016, 7, 13, 0, 0),
              order='YMD'),
        param('Sunday 23 May 1856 12:09:08 AM',
              expected=datetime(1856, 5, 23, 0, 9, 8),
              order='DMY'),
    ])
    def test_order(self, date_string, expected=None, order=None):
        self.given_parser(settings={'DATE_ORDER': order})
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('10.1.2019',
              expected=datetime(2019, 1, 10, 0, 0),
              languages=['de'],
              settings={'PREFER_DAY_OF_MONTH': 'first'}),
        param('10.1.2019',
              expected=datetime(2019, 1, 10, 0, 0),
              languages=['de']),
        param('10.1.2019',
              expected=datetime(2019, 10, 1, 0, 0),
              settings={'DATE_ORDER': 'MDY'}),
        param('03/11/2559 05:13',
              datetime(2559, 3, 11, 5, 13),
              languages=["th"],
              settings={"DATE_ORDER": "MDY"}),
        param('03/15/2559 05:13',
              datetime(2559, 3, 15, 5, 13),
              languages=["th"],
              settings={"DATE_ORDER": "MDY"})
    ])
    def test_if_settings_provided_date_order_is_retained(
            self, date_string, expected=None, languages=None, settings=None):
        self.given_parser(languages=languages, settings=settings)
        self.when_date_is_parsed(date_string)
        self.then_date_was_parsed_by_date_parser()
        self.then_date_obj_exactly_is(expected)

    @parameterized.expand([
        param('::', None),
        param('..', None),
        param('  ', None),
        param('--', None),
        param('//', None),
        param('++', None),
    ])
    def test_parsing_strings_containing_only_separator_tokens(
            self, date_string, expected):
        self.given_parser()
        self.when_date_is_parsed(date_string)
        self.then_period_is('day')
        self.then_date_obj_exactly_is(expected)

    def given_local_tz_offset(self, offset):
        self.add_patch(
            patch.object(dateparser.timezone_parser,
                         'local_tz_offset',
                         new=timedelta(seconds=3600 * offset)))

    def given_parser(self, *args, **kwds):
        def collecting_get_date_data(parse):
            @wraps(parse)
            def wrapped(*args, **kwargs):
                self.date_result = parse(*args, **kwargs)
                return self.date_result

            return wrapped

        self.add_patch(
            patch.object(date_parser, 'parse',
                         collecting_get_date_data(date_parser.parse)))

        self.date_parser = Mock(wraps=date_parser)
        self.add_patch(
            patch('dateparser.date.date_parser', new=self.date_parser))
        self.parser = DateDataParser(*args, **kwds)

    def when_date_is_parsed(self, date_string):
        self.result = self.parser.get_date_data(date_string)

    def when_date_is_parsed_by_date_parser(self, date_string):
        try:
            self.result = DateParser().parse(date_string)
        except Exception as error:
            self.error = error

    def then_period_is(self, period):
        self.assertEqual(period, self.result['period'])

    def then_date_obj_exactly_is(self, expected):
        self.assertEqual(expected, self.result['date_obj'])

    def then_date_was_parsed_by_date_parser(self):
        self.assertNotEqual(NotImplemented, self.date_result,
                            "Date was not parsed")
        self.assertEqual(self.result['date_obj'], self.date_result[0])

    def then_timezone_parsed_is(self, tzstr):
        self.assertTrue(tzstr in repr(self.result['date_obj'].tzinfo))
        self.result['date_obj'] = self.result['date_obj'].replace(tzinfo=None)
Exemple #46
0
    fmt = None
    try:
        fmt = params[0]['value']
    except Exception, e:
        pass

    if fmt:
        try:
            d = parse(value)
            value = arrow.get(d, 'Asia/Shanghai').format(fmt)
        except Exception, e:
            try:
                value = arrow.get(value, 'Asia/Shanghai').format(fmt)
            except Exception, e:
                try:
                    ddp = DateDataParser()
                    ret = ddp.get_date_data(value)
                    dateobj = ret['date_obj']
                    ts = arrow.get(dateobj).timestamp + 8 * 60 * 60
                    value = arrow.get(ts).format(fmt)
                except Exception, e:
                    pass

    return value


def substrFilter(value, params=None):
    p = p1 = None
    try:
        p = params[0]['value']
        p1 = params[1]['value']