예제 #1
0
 def testRangeChange(self):
     p = self.tp("the event runs from 1-2")
     p.saveCorrection((20, 21), "13:00", old_range=(15, 23))
     p.saveCorrection((22, 23), "14:00", old_range=(15, 23))
     pr = p.parsedRanges()
     self.assertEqual(pr, [((20, 21), PartialTime(hour=13)),
                           ((22, 23), PartialTime(hour=14))])
예제 #2
0
 def testTwoCorrections(self):
     p = self.tp("30 or 45 minutes past noon")
     p.saveCorrection((0, 2), "12:30")
     p.saveCorrection((6, 8), "12:45")
     pr = p.parsedRanges()
     self.assertEqual(pr[0][1], PartialTime(hour=12, minute=30))
     self.assertEqual(pr[1][1], PartialTime(hour=12, minute=45))
     self.assertEqual(pr[0][1], PartialTime(hour=12, minute=30))
예제 #3
0
파일: NLTime.py 프로젝트: drewp/eventually
    def valid_parses(self, filter_incomplete=True):
        """Returns all valid parses and their score, ordered by score.
        ((parse1, score1), (parse2, score2), ...)"""
        context = self.parseobj.context or PartialTime.now()

        interpretable_parses = []
        all_parses = cartesianproduct([node.parses for node in self],
                                      self.all_compatible)
        for interp in all_parses:
            # print "interp", interp
            interp = SegmentInterpretation(interp)
            interp.segment = self
            score, result = interp.score(context)
            # print "score", score
            # print "result", repr(result)
            if score > 0 and result:
                if filter_incomplete and not result.is_interpretable():
                    continue
                interpretable_parses.append((score, result))
        
        scores = {} # interp : best score
        for score, interp in interpretable_parses:
            if (interp in scores and score > scores[interp]) or \
               (interp not in scores):
                scores[interp] = score

        parses = sortedtimes(scores.items(), context)
        return parses
예제 #4
0
    def valid_parses(self, filter_incomplete=True):
        """Returns all valid parses and their score, ordered by score.
        ((parse1, score1), (parse2, score2), ...)"""
        context = self.parseobj.context or PartialTime.now()

        interpretable_parses = []
        all_parses = cartesianproduct([node.parses for node in self],
                                      self.all_compatible)
        for interp in all_parses:
            # print "interp", interp
            interp = SegmentInterpretation(interp)
            interp.segment = self
            score, result = interp.score(context)
            # print "score", score
            # print "result", repr(result)
            if score > 0 and result:
                if filter_incomplete and not result.is_interpretable():
                    continue
                interpretable_parses.append((score, result))

        scores = {}  # interp : best score
        for score, interp in interpretable_parses:
            if (interp in scores and score > scores[interp]) or \
               (interp not in scores):
                scores[interp] = score

        parses = sortedtimes(scores.items(), context)
        return parses
예제 #5
0
    def testSave(self):
        p = self.tp("noonish")
        p.saveCorrection((0, 7), "noon")

        p2 = self.tp("noonish")
        pr = p2.parsedRanges()
        self.assertEqual(pr[0], ((0, 7), PartialTime(hour=12)))
예제 #6
0
파일: NLTime.py 프로젝트: drewp/eventually
    def expand_day_relatives(self, context):
        """Expands 'yesterday', 'today', 'tonight', 'tomorrow', and 'now'."""
        if 'relative' in self.parsedict:
            rel = self.parsedict['relative']
            try:
                if rel == 'now':
                    return context

                if rel == 'tonight':
                    rel = 'today'
                day_offset = ['yesterday', 'today', 'tomorrow'].index(rel) - 1
                day_delta = datetime.timedelta(days=day_offset)
                original = PartialTime.from_object(self.parsedict)
                newdate = PartialTime.from_object(context.as_date() + day_delta)
                return original.combine(newdate)
            except ValueError:
                pass

        return None
예제 #7
0
def is_date(text):
    if ordinals_re.search(text):
        return None

    # remove trailing non-numbers
    text = nonint_end.sub('', text)
    text = nonint_start.sub('', text)
    match = date_re.match(text)
    if match:
        results = []
        a, b, c = match.groups()
        # month-day-year and year-month-day
        for month, day, year in ((a, b, c), (b, c, a), (a, c, b)):
            if month:
                m = is_month(month)
            else:
                m = None

            if day:
                d = is_day(day)
            else:
                d = None

            if year is not None:
                y = is_year(year)

            if m and d:
                if year and y:  # valid year portion
                    if type(y) is not list:
                        y = [y]
                    results.extend([datetime.date(year, m, d) for year in y])
                elif year:  # invalid year portion
                    pass  # the date is no good
                else:  # missing year portion
                    results.append(PartialTime(month=m, day=d))

            if m and year:
                if type(y) is not list:
                    y = [y]
                results.extend([PartialTime(year=year, month=m) for year in y])

        return results or None
예제 #8
0
    def expand_day_relatives(self, context):
        """Expands 'yesterday', 'today', 'tonight', 'tomorrow', and 'now'."""
        if 'relative' in self.parsedict:
            rel = self.parsedict['relative']
            try:
                if rel == 'now':
                    return context

                if rel == 'tonight':
                    rel = 'today'
                day_offset = ['yesterday', 'today', 'tomorrow'].index(rel) - 1
                day_delta = datetime.timedelta(days=day_offset)
                original = PartialTime.from_object(self.parsedict)
                newdate = PartialTime.from_object(context.as_date() +
                                                  day_delta)
                return original.combine(newdate)
            except ValueError:
                pass

        return None
예제 #9
0
def is_year(text):
    text = punc_start_re.sub('', text)
    text = punc_end_re.sub('', text)
    if len(text) in (2, 4) and not ordinals_re.search(text):
        i = is_int(text)
        current_year = PartialTime.now().year
        # we'll arbitrarily decide that we're not referring to years
        # more than 200 years in the future
        if i is not None and i < current_year + 200:
            if len(text) == 2:
                return closest(2000, (2000 + i, 1900 + i))
            else:
                return i
        return None
예제 #10
0
파일: NLTime.py 프로젝트: drewp/eventually
def is_year(text):
    text = punc_start_re.sub('', text)
    text = punc_end_re.sub('', text)
    if len(text) in (2, 4) and not ordinals_re.search(text):
        i = is_int(text)
        current_year = PartialTime.now().year
        # we'll arbitrarily decide that we're not referring to years
        # more than 200 years in the future
        if i is not None and i < current_year + 200:
            if len(text) == 2:
                return closest(2000, (2000 + i, 1900 + i))
            else:
                return i
        return None
예제 #11
0
파일: NLTime.py 프로젝트: drewp/eventually
    def expand_dayofweek_with_relatives(self, context):
        """Perform a day of week expansion given a relative ("this monday", 
        "next tuesday", etc.)"""
        without_dow = self.parsedict.copy()
        dow = without_dow.pop('dayofweek', None)
        # if we have a dayofweek but no date elements
        if dow and not (without_dow.get('year') or \
                        without_dow.get('month') or \
                        without_dow.get('day')):
            rel = self.parsedict.get('relative', 'this')

            try:
                offset = ['last', 'this', 'next'].index(rel) - 1
                p = PartialTime(dayofweek=self.parsedict['dayofweek'])

                pt_without_dow = PartialTime.from_object(without_dow)
                relative_pt = p.relative_day_of_week(offset=offset,
                    context=context)

                return pt_without_dow.combine(relative_pt)
            except ValueError:
                pass

        return None
예제 #12
0
    def expand_dayofweek_with_relatives(self, context):
        """Perform a day of week expansion given a relative ("this monday", 
        "next tuesday", etc.)"""
        without_dow = self.parsedict.copy()
        dow = without_dow.pop('dayofweek', None)
        # if we have a dayofweek but no date elements
        if dow and not (without_dow.get('year') or \
                        without_dow.get('month') or \
                        without_dow.get('day')):
            rel = self.parsedict.get('relative', 'this')

            try:
                offset = ['last', 'this', 'next'].index(rel) - 1
                p = PartialTime(dayofweek=self.parsedict['dayofweek'])

                pt_without_dow = PartialTime.from_object(without_dow)
                relative_pt = p.relative_day_of_week(offset=offset,
                                                     context=context)

                return pt_without_dow.combine(relative_pt)
            except ValueError:
                pass

        return None
예제 #13
0
파일: NLTime.py 프로젝트: drewp/eventually
    def expand_ordinal_dow_month(self, context):
        # adapted from Mark Pettit's "Findng the x'th day in a month" 
        # cookbook recipe, seen at
        # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/425607

        if not (self.parsedict.get('month') and 
                self.parsedict.get('dayofweek')):
            return None

        offset = self.parsedict.get('ordinal')
        if offset is not None:
            offset -= 1

        if self.parsedict.get('relative') == 'last':
            offset = offset or 0
            # last means go from 0 (first) to -1, from 1 (second) to -2
            # (second to last), etc.
            offset += 1
            offset *= -1

        if offset is not None:
            # if year is present, we use it, otherwise, we get it from context
            year = self.parsedict.get('year', context.year)
            month = self.parsedict['month']
            dayofweek = self.parsedict['dayofweek'] - 1
            
            dt = datetime.date(year, month, 1)
            days = [] # list of days with the right dayofweek
            while dt.weekday() != dayofweek:
                dt = dt + datetime.timedelta(days=1)
            while dt.month == month:
                days.append(dt)
                dt = dt + datetime.timedelta(days=7)

            try:
                return PartialTime.from_object(days[offset])
            except IndexError:
                pass

        return None
예제 #14
0
    def expand_ordinal_dow_month(self, context):
        # adapted from Mark Pettit's "Findng the x'th day in a month"
        # cookbook recipe, seen at
        # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/425607

        if not (self.parsedict.get('month')
                and self.parsedict.get('dayofweek')):
            return None

        offset = self.parsedict.get('ordinal')
        if offset is not None:
            offset -= 1

        if self.parsedict.get('relative') == 'last':
            offset = offset or 0
            # last means go from 0 (first) to -1, from 1 (second) to -2
            # (second to last), etc.
            offset += 1
            offset *= -1

        if offset is not None:
            # if year is present, we use it, otherwise, we get it from context
            year = self.parsedict.get('year', context.year)
            month = self.parsedict['month']
            dayofweek = self.parsedict['dayofweek'] - 1

            dt = datetime.date(year, month, 1)
            days = []  # list of days with the right dayofweek
            while dt.weekday() != dayofweek:
                dt = dt + datetime.timedelta(days=1)
            while dt.month == month:
                days.append(dt)
                dt = dt + datetime.timedelta(days=7)

            try:
                return PartialTime.from_object(days[offset])
            except IndexError:
                pass

        return None
예제 #15
0
    def convert_parse_to_date_and_time(self, context):
        """In this method, we attempt to convert this SegmentInterpretation
        into a PartialTime.  We use context to try to fill in missing
        information.  Relative markers are expanded here."""
        # we run through all items and expand all date, datetimes, and times
        # (or anything with the right attributes)
        for tokenparser, result in self:
            # datetime.datetime is a child of datetime.date, so this covers
            # all three of them
            for attr in partialtime_attrs:
                try:
                    if attr not in self.parsedict:
                        expansion = getattr(result, attr)
                        if expansion is not None:
                            self.parsedict[attr] = expansion
                except AttributeError:
                    pass

        # fill in the current year if we don't have it but have part of a date
        if self.parsedict.get('month') is not None:
            if self.parsedict.get('year') is None:
                self.parsedict['year'] = context.year
        # fill in 0 for the minute if we have part of a time
        if 'hour' in self.parsedict:
            if not 'minute' in self.parsedict:
                self.parsedict['minute'] = 0

        for expfunc in (self.expand_day_relatives,
                        self.expand_dayofweek_with_relatives,
                        self.expand_ordinal_dow_month):
            expansion = expfunc(context)
            if expansion is not None:
                return expansion
        else:
            pt = PartialTime.from_object(self.parsedict)
            return pt
예제 #16
0
파일: NLTime.py 프로젝트: drewp/eventually
    def convert_parse_to_date_and_time(self, context):
        """In this method, we attempt to convert this SegmentInterpretation
        into a PartialTime.  We use context to try to fill in missing
        information.  Relative markers are expanded here."""
        # we run through all items and expand all date, datetimes, and times
        # (or anything with the right attributes)
        for tokenparser, result in self:
            # datetime.datetime is a child of datetime.date, so this covers
            # all three of them
            for attr in partialtime_attrs:
                try:
                    if attr not in self.parsedict:
                        expansion = getattr(result, attr)
                        if expansion is not None:
                            self.parsedict[attr] = expansion
                except AttributeError:
                    pass

        # fill in the current year if we don't have it but have part of a date
        if self.parsedict.get('month') is not None:
            if self.parsedict.get('year') is None:
                self.parsedict['year'] = context.year
        # fill in 0 for the minute if we have part of a time
        if 'hour' in self.parsedict:
            if not 'minute' in self.parsedict:
                self.parsedict['minute'] = 0

        for expfunc in (self.expand_day_relatives,
                        self.expand_dayofweek_with_relatives,
                        self.expand_ordinal_dow_month):
            expansion = expfunc(context)
            if expansion is not None:
                return expansion
        else:
            pt = PartialTime.from_object(self.parsedict)
            return pt
예제 #17
0
def run_tests(opts, test_cases):
    # so the test cases have a consistent ordering
    test_cases = test_cases.items()
    test_cases.sort()

    ranks = []
    num_tests_failed = 0
    num_tests_run = 0
    num_segments_failed = 0
    num_segments_run = 0
    for test_case, expected_results in test_cases:
        # we don't have a result for this yet (maybe because the type of the
        # result object hasn't been built yet, e.g. ranges)
        if expected_results is NotImplementedError:
            continue

        parse = Parse(test_case, context=now)
        segments = parse.segments
        num_tests_run += 1

        if expected_results is None:
            num_segments_run += 1
            if some(lambda s: s.valid_parses(True), segments):
                num_tests_failed += 1
                num_segments_failed += 1
                if not opts.summary_only:
                    report_test(test_case,
                                segments, [], [],
                                msg="Parse found %s segment(s) but there "
                                "should have been none." % len(segments),
                                filter_incomplete=True)
            continue

        # listify the test case if we haven't already
        if not isinstance(expected_results, (list, tuple)):
            expected_results = [
                expected_results,
            ]

        expected_results = [
            PartialTime.from_object(res) for res in expected_results
        ]
        unmatched_results = list(expected_results)  # a copy that we'll modify

        num_segments_run += len(expected_results)

        for segment in segments:
            # we don't filter out incomplete parses since some test cases have
            # incomplete answers
            seg_results = segment.valid_parses(filter_incomplete=False)
            if seg_results:
                for rank, (result, score) in enumerate(seg_results[:20]):
                    if result in unmatched_results:
                        unmatched_results.remove(result)
                        ranks.append(rank)

        num_segments_failed += len(unmatched_results)

        if unmatched_results or opts.verbose:
            if unmatched_results:
                num_tests_failed += 1
            if not opts.summary_only:
                report_test(test_case, segments, expected_results,
                            unmatched_results)

    print "Summary:"
    print "Ran %d tests, %d failed." % (num_tests_run, num_tests_failed)
    print "Ran %d segment tests, %d failed." % (num_segments_run,
                                                num_segments_failed)
    # this tells us how well it is doing for the answers that it did find
    average = sum(ranks) / float(len(ranks))
    print 'Distribution:', ', '.join([str(pair) for pair in histogram(ranks)])
    print "Average rank: %.5f, stddev %.5f" % (average,
                                               stddev(ranks, meanval=average))
예제 #18
0
 def testCorrection(self):
     p = self.tp("the time is 3")
     p.saveCorrection((12, 13), "3:00am")
     corrected = p.parsedRanges()[0]
     self.assertEqual(corrected, ((12, 13), PartialTime(hour=3)))
예제 #19
0
 def testNoCorrections(self):
     pr = self.tp("the time is 3pm").parsedRanges()
     self.assertEqual(pr[0], ((12, 15), PartialTime(hour=15)))
예제 #20
0
파일: test.py 프로젝트: drewp/eventually
def run_tests(opts, test_cases):
    # so the test cases have a consistent ordering
    test_cases = test_cases.items()
    test_cases.sort()
    
    ranks = []
    num_tests_failed = 0
    num_tests_run = 0
    num_segments_failed = 0
    num_segments_run = 0
    for test_case, expected_results in test_cases:
        # we don't have a result for this yet (maybe because the type of the
        # result object hasn't been built yet, e.g. ranges)
        if expected_results is NotImplementedError:
            continue
        
        parse = Parse(test_case, context=now)
        segments = parse.segments
        num_tests_run += 1
        
        if expected_results is None:
            num_segments_run += 1
            if some(lambda s: s.valid_parses(True), segments):
                num_tests_failed += 1
                num_segments_failed += 1
                if not opts.summary_only:
                    report_test(test_case, segments, [], [],
                                msg="Parse found %s segment(s) but there "
                                "should have been none." % len(segments),
                                filter_incomplete=True)
            continue
        
        # listify the test case if we haven't already
        if not isinstance(expected_results, (list, tuple)):
            expected_results = [expected_results,]
        
        expected_results = [PartialTime.from_object(res)
            for res in expected_results]
        unmatched_results = list(expected_results) # a copy that we'll modify
        
        num_segments_run += len(expected_results)
        
        for segment in segments:
            # we don't filter out incomplete parses since some test cases have
            # incomplete answers
            seg_results = segment.valid_parses(filter_incomplete=False)
            if seg_results:
                for rank, (result, score) in enumerate(seg_results[:20]):
                    if result in unmatched_results:
                        unmatched_results.remove(result)
                        ranks.append(rank)
        
        num_segments_failed += len(unmatched_results)
        
        if unmatched_results or opts.verbose:
            if unmatched_results:
                num_tests_failed += 1
            if not opts.summary_only:
                report_test(test_case, segments,
                            expected_results, unmatched_results)
    
    print "Summary:"
    print "Ran %d tests, %d failed." % (num_tests_run, num_tests_failed)
    print "Ran %d segment tests, %d failed." % (num_segments_run,
                                                num_segments_failed)
    # this tells us how well it is doing for the answers that it did find
    average = sum(ranks) / float(len(ranks))
    print 'Distribution:', ', '.join([str(pair) for pair in histogram(ranks)])
    print "Average rank: %.5f, stddev %.5f" % (average,
                                               stddev(ranks, meanval=average))
예제 #21
0
# we try to use the AIMA library, which provides some py2.4 support for
# earlier Pythons
try:
    from AIMA import *
except ImportError:
    pass


def find_next_day_of_week(dayofweek, start):
    while start.weekday() != dayofweek:
        start += datetime.timedelta(days=1)
    return start


now = PartialTime.now()

# string : list of required results
#          or None (no matches expected)
#          or NotImplementedError (skip this test)
test_cases = {
    "Jan 3rd":
    datetime.date(2005, 1, 3),
    """This year's Campus Dance will be held on Friday, May 27 from 9:00 pm
to 1:00 am with Duke Belaire's swing band on the College Green,
student bands on Lincoln Field, and jazz music at Carrie Tower.""":
    datetime.datetime(2005, 5, 27, 21, 0, 0),
    "FRI JUL 19 2002":
    datetime.date(2002, 7, 19),

    # XXX we don't do ranges yet
예제 #22
0
 def test(self):
     parse = TrainedParse(None, "the time is 3pm")
     pr = list(parse.parsedRanges())
     self.assertEqual(len(pr), 1)
     self.assertEqual(pr[0], ((12, 15), PartialTime(hour=15)))
예제 #23
0
파일: test.py 프로젝트: drewp/eventually
from sets import Set
import datetime, calendar, optparse

# we try to use the AIMA library, which provides some py2.4 support for
# earlier Pythons
try:
    from AIMA import *
except ImportError:
    pass

def find_next_day_of_week(dayofweek, start):
    while start.weekday() != dayofweek:
        start += datetime.timedelta(days=1)
    return start

now = PartialTime.now()

# string : list of required results
#          or None (no matches expected)
#          or NotImplementedError (skip this test)
test_cases = {
    "Jan 3rd" : datetime.date(2005, 1, 3),

    """This year's Campus Dance will be held on Friday, May 27 from 9:00 pm
to 1:00 am with Duke Belaire's swing band on the College Green,
student bands on Lincoln Field, and jazz music at Carrie Tower.""" :
        datetime.datetime(2005, 5, 27, 21, 0, 0),

    "FRI JUL 19 2002" : datetime.date(2002, 7, 19),

    # XXX we don't do ranges yet