def testMonthRange(self): text = "2010-01/2012-05" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 2010) self.assertEqual(result["start"]["month"], 1) self.assertEqual(result["end"]["year"], 2012) self.assertEqual(result["end"]["month"], 5)
def check_date(date_string): """ for checking validity of any date expressed in the DwCA format It does not check if date is in the future, just that it is parseable and that min is smaller than max. Returns QCs as described. """ qc_mask = 0 try: parser = ISODateParser(date_string) qc_mask |= qc_mask_7 ms_start = misc.date_to_millis(parser.dates["start"]) ms_end = misc.date_to_millis(parser.dates["end"]) # End before start if ms_start <= ms_end: qc_mask |= qc_mask_12 # No timezone if parser.components["start"]["timezone"] is not None: qc_mask |= qc_mask_13 # No hour:minute if parser.components["start"]["hours"] is not None: if parser.components["start"]["minutes"] is not None: qc_mask |= qc_mask_13 except (ValueError, TypeError): return qc_mask return qc_mask
def testImpossibleTimeValues(self): texts = [ "2018-01-01T25:01:01", "2018-01-01T01:65:01", "2018-01-01T01:01:65" ] for text in texts: with self.assertRaises(ValueError): ISODateParser(text).dates
def testDateOmitted2(self): text = "2018-03-01T05:06/T07:08" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 2018) self.assertEqual(result["start"]["month"], 3) self.assertEqual(result["start"]["day"], 1) self.assertEqual(result["end"]["hours"], 7) self.assertEqual(result["end"]["minutes"], 8)
def testYearOmitted(self): text = "2008-02-15/03-14" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 2008) self.assertEqual(result["start"]["month"], 2) self.assertEqual(result["start"]["day"], 15) self.assertEqual(result["end"]["month"], 3) self.assertEqual(result["end"]["day"], 14)
def testTimeZoneWithoutSeparator(self): text = "2010-01-02T03:04+0630" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 2010) self.assertEqual(result["start"]["month"], 1) self.assertEqual(result["start"]["day"], 2) self.assertEqual(result["start"]["hours"], 3) self.assertEqual(result["start"]["minutes"], 4) self.assertEqual(result["start"]["timezone"], 6.5)
def testSimpleDateTimeSpace(self): text = "1990-01-02 12:13:14" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 1990) self.assertEqual(result["start"]["month"], 1) self.assertEqual(result["start"]["day"], 2) self.assertEqual(result["start"]["hours"], 12) self.assertEqual(result["start"]["minutes"], 13) self.assertEqual(result["start"]["seconds"], 14) self.assertEqual(result["start"]["timezone"], None)
def testDateTimeUTC(self): text = "2017-08-24T14:51:57Z" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 2017) self.assertEqual(result["start"]["month"], 8) self.assertEqual(result["start"]["day"], 24) self.assertEqual(result["start"]["hours"], 14) self.assertEqual(result["start"]["minutes"], 51) self.assertEqual(result["start"]["seconds"], 57) self.assertEqual(result["start"]["timezone"], 0)
def testEndDay(self): text = "1973-06-18/26" parser = ISODateParser(text) result = parser.components self.assertEqual(result["start"]["year"], 1973) self.assertEqual(result["start"]["month"], 6) self.assertEqual(result["start"]["day"], 18) self.assertEqual(result["end"]["day"], 26) dates = parser.dates self.assertEqual(dates["start"], datetime.date(1973, 6, 18)) self.assertEqual(dates["end"], datetime.date(1973, 6, 26))
def testDateRangeTimeZone(self): text = "1990-01-02T03:04:05/2014-05-06 07:08+0900" result = ISODateParser(text).components self.assertEqual(result["start"]["year"], 1990) self.assertEqual(result["start"]["month"], 1) self.assertEqual(result["start"]["day"], 2) self.assertEqual(result["start"]["hours"], 3) self.assertEqual(result["start"]["minutes"], 4) self.assertEqual(result["start"]["seconds"], 5) self.assertEqual(result["start"]["timezone"], None) self.assertEqual(result["end"]["year"], 2014) self.assertEqual(result["end"]["month"], 5) self.assertEqual(result["end"]["day"], 6) self.assertEqual(result["end"]["hours"], 7) self.assertEqual(result["end"]["minutes"], 8) self.assertEqual(result["end"]["seconds"], None) self.assertEqual(result["end"]["timezone"], 9)
def check_record(record, min_year=0): """Check the eventDate.""" result = { "missing": [], "invalid": [], "flags": [], "annotations": {}, "dropped": False } if "eventDate" in record and record["eventDate"] is not None: try: parser = ISODateParser(record["eventDate"]) if parser.dates["start"].year < min_year: # year precedes minimum year in settings result["flags"].append(Flag.DATE_BEFORE_MIN.value) raise ValueError ms_start = date_to_millis(parser.dates["start"]) ms_mid = date_to_millis(parser.dates["mid"]) ms_end = date_to_millis(parser.dates["end"]) year = datetime.datetime.fromtimestamp(ms_mid / 1000).year if ms_end > date_to_millis(datetime.date.today()): # date in the future result["flags"].append(Flag.DATE_IN_FUTURE.value) raise ValueError result["annotations"]["date_start"] = ms_start result["annotations"]["date_mid"] = ms_mid result["annotations"]["date_end"] = ms_end result["annotations"]["date_year"] = year except ValueError: result["invalid"].append("eventDate") except: logger.error("Error processing date " + record["eventDate"]) raise else: result["missing"].append("eventDate") return result
def check_record(record, min_year=0): """Check the event date """ qc_mask = 0 if "eventDate" in record and record["eventDate"] is not None: try: parser = ISODateParser(record["eventDate"]) # Date parsed, means format is OK qc_mask |= qc_mask_7 if parser.dates["mid"].year >= min_year: # year not precede minimum year in settings qc_mask |= qc_mask_11 ms_start = misc.date_to_millis(parser.dates["start"]) ms_end = misc.date_to_millis(parser.dates["end"]) if ms_start >= misc.date_to_millis(datetime.date.today()): # date not in the future qc_mask |= qc_mask_11 if ms_start <= ms_end: # Min and max consistent qc_mask |= qc_mask_12 # is timezone filled (does it need to be checked for both ends) if parser.components["start"]["timezone"] is not None: qc_mask |= qc_mask_13 # is a time filled (limiting to hours:minutes) if parser.components["start"]["hours"] is not None: if parser.components["start"]["minutes"] is not None: qc_mask |= qc_mask_13 except (ValueError, TypeError): return qc_mask return qc_mask
def testTimeMissing(self): text = "1981-06-01+00:00" with self.assertRaises(ValueError): ISODateParser(text).dates
def testDates(self): text = "1990-01/2014-05" result = ISODateParser(text).dates self.assertEqual(result["start"], datetime.date(1990, 1, 1)) self.assertEqual(result["end"], datetime.date(2014, 5, 31))
def testDatesOnlyStart(self): text = "1990-01-02" result = ISODateParser(text).dates self.assertEqual(result["start"], datetime.date(1990, 1, 2)) self.assertEqual(result["end"], datetime.date(1990, 1, 2)) self.assertEqual(result["mid"], datetime.date(1990, 1, 2))
def testDashes(self): text = "--" with self.assertRaises(ValueError): ISODateParser(text).dates
def testBackSlash(self): text = "2003-04-30T12:00\\2003-04-30T17:30" with self.assertRaises(ValueError): ISODateParser(text).dates
def testDatesOnlyMonth(self): text = "1990-01" result = ISODateParser(text).dates self.assertEqual(result["start"], datetime.date(1990, 1, 1)) self.assertEqual(result["end"], datetime.date(1990, 1, 31))