def testRangeChange(self): p = self.tp("the event runs from 1-2") p.saveCorrection((20, 21), "13:00", old_range=(15, 23)) p.saveCorrection((22, 23), "14:00", old_range=(15, 23)) pr = p.parsedRanges() self.assertEqual(pr, [((20, 21), PartialTime(hour=13)), ((22, 23), PartialTime(hour=14))])
def testTwoCorrections(self): p = self.tp("30 or 45 minutes past noon") p.saveCorrection((0, 2), "12:30") p.saveCorrection((6, 8), "12:45") pr = p.parsedRanges() self.assertEqual(pr[0][1], PartialTime(hour=12, minute=30)) self.assertEqual(pr[1][1], PartialTime(hour=12, minute=45)) self.assertEqual(pr[0][1], PartialTime(hour=12, minute=30))
def valid_parses(self, filter_incomplete=True): """Returns all valid parses and their score, ordered by score. ((parse1, score1), (parse2, score2), ...)""" context = self.parseobj.context or PartialTime.now() interpretable_parses = [] all_parses = cartesianproduct([node.parses for node in self], self.all_compatible) for interp in all_parses: # print "interp", interp interp = SegmentInterpretation(interp) interp.segment = self score, result = interp.score(context) # print "score", score # print "result", repr(result) if score > 0 and result: if filter_incomplete and not result.is_interpretable(): continue interpretable_parses.append((score, result)) scores = {} # interp : best score for score, interp in interpretable_parses: if (interp in scores and score > scores[interp]) or \ (interp not in scores): scores[interp] = score parses = sortedtimes(scores.items(), context) return parses
def testSave(self): p = self.tp("noonish") p.saveCorrection((0, 7), "noon") p2 = self.tp("noonish") pr = p2.parsedRanges() self.assertEqual(pr[0], ((0, 7), PartialTime(hour=12)))
def expand_day_relatives(self, context): """Expands 'yesterday', 'today', 'tonight', 'tomorrow', and 'now'.""" if 'relative' in self.parsedict: rel = self.parsedict['relative'] try: if rel == 'now': return context if rel == 'tonight': rel = 'today' day_offset = ['yesterday', 'today', 'tomorrow'].index(rel) - 1 day_delta = datetime.timedelta(days=day_offset) original = PartialTime.from_object(self.parsedict) newdate = PartialTime.from_object(context.as_date() + day_delta) return original.combine(newdate) except ValueError: pass return None
def is_date(text): if ordinals_re.search(text): return None # remove trailing non-numbers text = nonint_end.sub('', text) text = nonint_start.sub('', text) match = date_re.match(text) if match: results = [] a, b, c = match.groups() # month-day-year and year-month-day for month, day, year in ((a, b, c), (b, c, a), (a, c, b)): if month: m = is_month(month) else: m = None if day: d = is_day(day) else: d = None if year is not None: y = is_year(year) if m and d: if year and y: # valid year portion if type(y) is not list: y = [y] results.extend([datetime.date(year, m, d) for year in y]) elif year: # invalid year portion pass # the date is no good else: # missing year portion results.append(PartialTime(month=m, day=d)) if m and year: if type(y) is not list: y = [y] results.extend([PartialTime(year=year, month=m) for year in y]) return results or None
def is_year(text): text = punc_start_re.sub('', text) text = punc_end_re.sub('', text) if len(text) in (2, 4) and not ordinals_re.search(text): i = is_int(text) current_year = PartialTime.now().year # we'll arbitrarily decide that we're not referring to years # more than 200 years in the future if i is not None and i < current_year + 200: if len(text) == 2: return closest(2000, (2000 + i, 1900 + i)) else: return i return None
def expand_dayofweek_with_relatives(self, context): """Perform a day of week expansion given a relative ("this monday", "next tuesday", etc.)""" without_dow = self.parsedict.copy() dow = without_dow.pop('dayofweek', None) # if we have a dayofweek but no date elements if dow and not (without_dow.get('year') or \ without_dow.get('month') or \ without_dow.get('day')): rel = self.parsedict.get('relative', 'this') try: offset = ['last', 'this', 'next'].index(rel) - 1 p = PartialTime(dayofweek=self.parsedict['dayofweek']) pt_without_dow = PartialTime.from_object(without_dow) relative_pt = p.relative_day_of_week(offset=offset, context=context) return pt_without_dow.combine(relative_pt) except ValueError: pass return None
def expand_ordinal_dow_month(self, context): # adapted from Mark Pettit's "Findng the x'th day in a month" # cookbook recipe, seen at # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/425607 if not (self.parsedict.get('month') and self.parsedict.get('dayofweek')): return None offset = self.parsedict.get('ordinal') if offset is not None: offset -= 1 if self.parsedict.get('relative') == 'last': offset = offset or 0 # last means go from 0 (first) to -1, from 1 (second) to -2 # (second to last), etc. offset += 1 offset *= -1 if offset is not None: # if year is present, we use it, otherwise, we get it from context year = self.parsedict.get('year', context.year) month = self.parsedict['month'] dayofweek = self.parsedict['dayofweek'] - 1 dt = datetime.date(year, month, 1) days = [] # list of days with the right dayofweek while dt.weekday() != dayofweek: dt = dt + datetime.timedelta(days=1) while dt.month == month: days.append(dt) dt = dt + datetime.timedelta(days=7) try: return PartialTime.from_object(days[offset]) except IndexError: pass return None
def convert_parse_to_date_and_time(self, context): """In this method, we attempt to convert this SegmentInterpretation into a PartialTime. We use context to try to fill in missing information. Relative markers are expanded here.""" # we run through all items and expand all date, datetimes, and times # (or anything with the right attributes) for tokenparser, result in self: # datetime.datetime is a child of datetime.date, so this covers # all three of them for attr in partialtime_attrs: try: if attr not in self.parsedict: expansion = getattr(result, attr) if expansion is not None: self.parsedict[attr] = expansion except AttributeError: pass # fill in the current year if we don't have it but have part of a date if self.parsedict.get('month') is not None: if self.parsedict.get('year') is None: self.parsedict['year'] = context.year # fill in 0 for the minute if we have part of a time if 'hour' in self.parsedict: if not 'minute' in self.parsedict: self.parsedict['minute'] = 0 for expfunc in (self.expand_day_relatives, self.expand_dayofweek_with_relatives, self.expand_ordinal_dow_month): expansion = expfunc(context) if expansion is not None: return expansion else: pt = PartialTime.from_object(self.parsedict) return pt
def run_tests(opts, test_cases): # so the test cases have a consistent ordering test_cases = test_cases.items() test_cases.sort() ranks = [] num_tests_failed = 0 num_tests_run = 0 num_segments_failed = 0 num_segments_run = 0 for test_case, expected_results in test_cases: # we don't have a result for this yet (maybe because the type of the # result object hasn't been built yet, e.g. ranges) if expected_results is NotImplementedError: continue parse = Parse(test_case, context=now) segments = parse.segments num_tests_run += 1 if expected_results is None: num_segments_run += 1 if some(lambda s: s.valid_parses(True), segments): num_tests_failed += 1 num_segments_failed += 1 if not opts.summary_only: report_test(test_case, segments, [], [], msg="Parse found %s segment(s) but there " "should have been none." % len(segments), filter_incomplete=True) continue # listify the test case if we haven't already if not isinstance(expected_results, (list, tuple)): expected_results = [ expected_results, ] expected_results = [ PartialTime.from_object(res) for res in expected_results ] unmatched_results = list(expected_results) # a copy that we'll modify num_segments_run += len(expected_results) for segment in segments: # we don't filter out incomplete parses since some test cases have # incomplete answers seg_results = segment.valid_parses(filter_incomplete=False) if seg_results: for rank, (result, score) in enumerate(seg_results[:20]): if result in unmatched_results: unmatched_results.remove(result) ranks.append(rank) num_segments_failed += len(unmatched_results) if unmatched_results or opts.verbose: if unmatched_results: num_tests_failed += 1 if not opts.summary_only: report_test(test_case, segments, expected_results, unmatched_results) print "Summary:" print "Ran %d tests, %d failed." % (num_tests_run, num_tests_failed) print "Ran %d segment tests, %d failed." % (num_segments_run, num_segments_failed) # this tells us how well it is doing for the answers that it did find average = sum(ranks) / float(len(ranks)) print 'Distribution:', ', '.join([str(pair) for pair in histogram(ranks)]) print "Average rank: %.5f, stddev %.5f" % (average, stddev(ranks, meanval=average))
def testCorrection(self): p = self.tp("the time is 3") p.saveCorrection((12, 13), "3:00am") corrected = p.parsedRanges()[0] self.assertEqual(corrected, ((12, 13), PartialTime(hour=3)))
def testNoCorrections(self): pr = self.tp("the time is 3pm").parsedRanges() self.assertEqual(pr[0], ((12, 15), PartialTime(hour=15)))
def run_tests(opts, test_cases): # so the test cases have a consistent ordering test_cases = test_cases.items() test_cases.sort() ranks = [] num_tests_failed = 0 num_tests_run = 0 num_segments_failed = 0 num_segments_run = 0 for test_case, expected_results in test_cases: # we don't have a result for this yet (maybe because the type of the # result object hasn't been built yet, e.g. ranges) if expected_results is NotImplementedError: continue parse = Parse(test_case, context=now) segments = parse.segments num_tests_run += 1 if expected_results is None: num_segments_run += 1 if some(lambda s: s.valid_parses(True), segments): num_tests_failed += 1 num_segments_failed += 1 if not opts.summary_only: report_test(test_case, segments, [], [], msg="Parse found %s segment(s) but there " "should have been none." % len(segments), filter_incomplete=True) continue # listify the test case if we haven't already if not isinstance(expected_results, (list, tuple)): expected_results = [expected_results,] expected_results = [PartialTime.from_object(res) for res in expected_results] unmatched_results = list(expected_results) # a copy that we'll modify num_segments_run += len(expected_results) for segment in segments: # we don't filter out incomplete parses since some test cases have # incomplete answers seg_results = segment.valid_parses(filter_incomplete=False) if seg_results: for rank, (result, score) in enumerate(seg_results[:20]): if result in unmatched_results: unmatched_results.remove(result) ranks.append(rank) num_segments_failed += len(unmatched_results) if unmatched_results or opts.verbose: if unmatched_results: num_tests_failed += 1 if not opts.summary_only: report_test(test_case, segments, expected_results, unmatched_results) print "Summary:" print "Ran %d tests, %d failed." % (num_tests_run, num_tests_failed) print "Ran %d segment tests, %d failed." % (num_segments_run, num_segments_failed) # this tells us how well it is doing for the answers that it did find average = sum(ranks) / float(len(ranks)) print 'Distribution:', ', '.join([str(pair) for pair in histogram(ranks)]) print "Average rank: %.5f, stddev %.5f" % (average, stddev(ranks, meanval=average))
# we try to use the AIMA library, which provides some py2.4 support for # earlier Pythons try: from AIMA import * except ImportError: pass def find_next_day_of_week(dayofweek, start): while start.weekday() != dayofweek: start += datetime.timedelta(days=1) return start now = PartialTime.now() # string : list of required results # or None (no matches expected) # or NotImplementedError (skip this test) test_cases = { "Jan 3rd": datetime.date(2005, 1, 3), """This year's Campus Dance will be held on Friday, May 27 from 9:00 pm to 1:00 am with Duke Belaire's swing band on the College Green, student bands on Lincoln Field, and jazz music at Carrie Tower.""": datetime.datetime(2005, 5, 27, 21, 0, 0), "FRI JUL 19 2002": datetime.date(2002, 7, 19), # XXX we don't do ranges yet
def test(self): parse = TrainedParse(None, "the time is 3pm") pr = list(parse.parsedRanges()) self.assertEqual(len(pr), 1) self.assertEqual(pr[0], ((12, 15), PartialTime(hour=15)))
from sets import Set import datetime, calendar, optparse # we try to use the AIMA library, which provides some py2.4 support for # earlier Pythons try: from AIMA import * except ImportError: pass def find_next_day_of_week(dayofweek, start): while start.weekday() != dayofweek: start += datetime.timedelta(days=1) return start now = PartialTime.now() # string : list of required results # or None (no matches expected) # or NotImplementedError (skip this test) test_cases = { "Jan 3rd" : datetime.date(2005, 1, 3), """This year's Campus Dance will be held on Friday, May 27 from 9:00 pm to 1:00 am with Duke Belaire's swing band on the College Green, student bands on Lincoln Field, and jazz music at Carrie Tower.""" : datetime.datetime(2005, 5, 27, 21, 0, 0), "FRI JUL 19 2002" : datetime.date(2002, 7, 19), # XXX we don't do ranges yet