def test_parse_str(self): # Parser should be able to handle bytestring and unicode uni_str = '2014-05-01 08:00:00' bytes_str = uni_str.encode() res = parse(bytes_str) expected = parse(uni_str) assert res == expected
def testYearFirst(self): dtstr = '090107' # Should be MMDDYY self.assertEqual(parse(dtstr, yearfirst=True), datetime(2009, 1, 7)) self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=False), datetime(2009, 1, 7))
def testDayFirst(self): dtstr = '090107' # Should be DDMMYY self.assertEqual(parse(dtstr, dayfirst=True), datetime(2007, 1, 9)) self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=True), datetime(2007, 1, 9))
def test_includes_timestr(self): timestr = "2020-13-97T44:61:83" try: parse(timestr) except ParserError as e: assert e.args[1] == timestr else: pytest.fail("Failed to raise ParserError")
def testNoYearFirstNoDayFirst(self): dtstr = '090107' # Should be MMDDYY self.assertEqual(parse(dtstr), datetime(2007, 9, 1)) self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=False), datetime(2007, 9, 1))
def testFuzzyWithTokens(self): s1 = "Today is 25 of September of 2003, exactly " \ "at 10:49:41 with timezone -03:00." self.assertEqual(parse(s1, fuzzy_with_tokens=True), (datetime(2003, 9, 25, 10, 49, 41, tzinfo=self.brsttz), ('Today is ', 'of ', ', exactly at ', ' with timezone ', '.'))) s2 = "http://biz.yahoo.com/ipo/p/600221.html" self.assertEqual(parse(s2, fuzzy_with_tokens=True), (datetime(2060, 2, 21, 0, 0, 0), ('http://biz.yahoo.com/ipo/p/', '.html')))
def testFuzzyAMPMProblem(self): # Sometimes fuzzy parsing results in AM/PM flag being set without # hours - if it's fuzzy it should ignore that. s1 = "I have a meeting on March 1, 1974." s2 = "On June 8th, 2020, I am going to be the first man on Mars" # Also don't want any erroneous AM or PMs changing the parsed time s3 = "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003" s4 = "Meet me at 3:00AM on December 3rd, 2003 at the AM/PM on Sunset" self.assertEqual(parse(s1, fuzzy=True), datetime(1974, 3, 1)) self.assertEqual(parse(s2, fuzzy=True), datetime(2020, 6, 8)) self.assertEqual(parse(s3, fuzzy=True), datetime(2003, 12, 3, 3)) self.assertEqual(parse(s4, fuzzy=True), datetime(2003, 12, 3, 3))
def testParseWithNulls(self): # This relies on the from __future__ import unicode_literals, because # explicitly specifying a unicode literal is a syntax error in Py 3.2 # May want to switch to u'...' if we ever drop Python 3.2 support. pstring = '\x00\x00August 29, 1924' assert parse(pstring) == datetime(1924, 8, 29)
def test_somewhat_ambiguous_string(self): # Ref: github issue #487 # The parser is choosing the wrong part for hour # causing datetime to raise an exception. dtstr = '1237 PM BRST Mon Oct 30 2017' res = parse(dtstr, tzinfo=self.tzinfos) assert res == datetime(2017, 10, 30, 12, 37, tzinfo=self.tzinfos)
def test_on_era(self): # This could be classified as an "eras" test, but the relevant part # about this is the ` on ` dstr = '2:15 PM on January 2nd 1973 A.D.' expected = datetime(1973, 1, 2, 14, 15) res = parse(dstr) assert res == expected
def test_extraneous_year2(self): # This was found in the wild at insidertrading.org dstr = ("Berylson Amy Smith 1998 Grantor Retained Annuity Trust " "u/d/t November 2, 1998 f/b/o Jennifer L Berylson") res = parse(dstr, fuzzy_with_tokens=True) expected = datetime(1998, 11, 2) assert res == expected
def test_unambiguous_YYYYMM(self): # 171206 can be parsed as YYMMDD. However, 201712 cannot be parsed # as instance of YYMMDD and parser could fallback to YYYYMM format. dstr = "201712" res = parse(dstr) expected = datetime(2017, 12, 1) assert res == expected
def test_parse_unambiguous_nonexistent_local(self): # When dates are specified "EST" even when they should be "EDT" in the # local time zone, we should still assign the local time zone with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'): dt_exp = datetime(2011, 8, 1, 12, 30, tzinfo=tz.tzlocal()) dt = parse('2011-08-01T12:30 EST') assert dt.tzname() == 'EDT' assert dt == dt_exp
def test_tzlocal_in_gmt(self): # GH #318 with TZEnvContext('GMT0BST,M3.5.0,M10.5.0'): # This is an imaginary datetime in tz.tzlocal() but should still # parse using the GMT-as-alias-for-UTC rule dt = parse('2004-05-01T12:00 GMT') dt_exp = datetime(2004, 5, 1, 12, tzinfo=tz.UTC) assert dt == dt_exp
def test_strftime_formats_2003Sep25(self, fmt, dstr): expected = datetime(2003, 9, 25) # First check that the format strings behave as expected # (not strictly necessary, but nice to have) assert expected.strftime(fmt) == dstr res = parse(dstr) assert res == expected
def test_valid_tzinfo_callable_input(self): dstr = "2014 January 19 09:00 UTC" def tzinfos(*args, **kwargs): return u"UTC+0" expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0")) res = parse(dstr, tzinfos=tzinfos) self.assert_equal_same_tz(res, expected)
def testMicrosecondPrecisionErrorReturns(self): # One more precision issue, discovered by Eric Brown. This should # be the last one, as we're no longer using floating points. for ms in [100001, 100000, 99999, 99998, 10001, 10000, 9999, 9998, 1001, 1000, 999, 998, 101, 100, 99, 98]: dt = datetime(2008, 2, 27, 21, 26, 1, ms) assert parse(dt.isoformat()) == dt
def test_extraneous_year_tokens(self): # This was found in the wild at insidertrading.org # Unlike in the case above, identifying the first "2012" as the year # would not be a problem, but inferring that the latter 2012 is hhmm # is a problem. dstr = "2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012" expected = datetime(2012, 11, 7) (res, tokens) = parse(dstr, fuzzy_with_tokens=True) assert res == expected assert tokens == ("2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d ",)
def testIncreasingCTime(self): # This test will check 200 different years, every month, every day, # every hour, every minute, every second, and every weekday, using # a delta of more or less 1 year, 1 month, 1 day, 1 minute and # 1 second. delta = timedelta(days=365+31+1, seconds=1+60+60*60) dt = datetime(1900, 1, 1, 0, 0, 0, 0) for i in range(200): assert parse(dt.ctime()) == dt dt += delta
def test_parse_tzinfos_fold(): NYC = tz.gettz('America/New_York') tzinfos = {'EST': NYC, 'EDT': NYC} dt_exp = tz.enfold(datetime(2011, 11, 6, 1, 30, tzinfo=NYC), fold=1) dt = parse('2011-11-06T01:30 EST', tzinfos=tzinfos) assert dt == dt_exp assert dt.tzinfo is dt_exp.tzinfo assert getattr(dt, 'fold') == getattr(dt_exp, 'fold') assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
def test_tzlocal_parse_fold(self): # One manifestion of GH #318 with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'): dt_exp = datetime(2011, 11, 6, 1, 30, tzinfo=tz.tzlocal()) dt_exp = tz.enfold(dt_exp, fold=1) dt = parse('2011-11-06T01:30 EST') # Because this is ambiguous, until `tz.tzlocal() is tz.tzlocal()` # we'll just check the attributes we care about rather than # dt == dt_exp assert dt.tzname() == dt_exp.tzname() assert dt.replace(tzinfo=None) == dt_exp.replace(tzinfo=None) assert getattr(dt, 'fold') == getattr(dt_exp, 'fold') assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
def test_duck_typing(self): # We want to support arbitrary classes that implement the stream # interface. class StringPassThrough(object): def __init__(self, stream): self.stream = stream def read(self, *args, **kwargs): return self.stream.read(*args, **kwargs) dstr = StringPassThrough(StringIO('2014 January 19')) res = parse(dstr) expected = datetime(2014, 1, 19) assert res == expected
def testParseUnicodeWords(self): class rus_parserinfo(parserinfo): MONTHS = [("янв", "Январь"), ("фев", "Февраль"), ("мар", "Март"), ("апр", "Апрель"), ("май", "Май"), ("июн", "Июнь"), ("июл", "Июль"), ("авг", "Август"), ("сен", "Сентябрь"), ("окт", "Октябрь"), ("ноя", "Ноябрь"), ("дек", "Декабрь")] expected = datetime(2015, 9, 10, 10, 20) res = parse('10 Сентябрь 2015 10:20', parserinfo=rus_parserinfo()) assert res == expected
def test_ybd(self): # If we have a 4-digit year, a non-numeric month (abbreviated or not), # and a day (1 or 2 digits), then there is no ambiguity as to which # token is a year/month/day. This holds regardless of what order the # terms are in and for each of the separators below. seps = ['-', ' ', '/', '.'] year_tokens = ['%Y'] month_tokens = ['%b', '%B'] day_tokens = ['%d'] if PLATFORM_HAS_DASH_D: day_tokens.append('%-d') prods = itertools.product(year_tokens, month_tokens, day_tokens) perms = [y for x in prods for y in itertools.permutations(x)] unambig_fmts = [sep.join(perm) for sep in seps for perm in perms] actual = datetime(2003, 9, 25) for fmt in unambig_fmts: dstr = actual.strftime(fmt) res = parse(dstr) assert res == actual
def test_decimal_error(value): # GH 632, GH 662 - decimal.Decimal raises some non-ParserError exception # when constructed with an invalid value with pytest.raises(ParserError): parse(value)
def test_rounding_floatlike_strings(dtstr, dt): assert parse(dtstr, default=datetime(2003, 9, 25)) == dt
def test_four_letter_day(self): dstr = 'Frid Dec 30, 2016' expected = datetime(2016, 12, 30) res = parse(dstr) assert res == expected
def test_non_date_number(self): dstr = '1,700' with pytest.raises(ParserError): parse(dstr)
def test_extraneous_year(self): # This was found in the wild at insidertrading.org dstr = "2011 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012" res = parse(dstr, fuzzy_with_tokens=True) expected = datetime(2012, 11, 7) assert res == expected
def test_extraneous_year3(self): # This was found in the wild at insidertrading.org dstr = "SMITH R & WEISS D 94 CHILD TR FBO M W SMITH UDT 12/1/1994" res = parse(dstr, fuzzy_with_tokens=True) expected = datetime(1994, 12, 1) assert res == expected