def test_atis_helper_methods(self):  # pylint: disable=no-self-use
        world = AtisWorld([("what is the earliest flight in morning "
                            "1993 june fourth from boston to pittsburgh")])
        assert world.dates == [datetime(1993, 6, 4, 0, 0)]
        assert world._get_numeric_database_values('time_range_end') == [
            '800', '1200'
        ]  # pylint: disable=protected-access
        assert world._get_sequence_with_spacing(world.grammar, # pylint: disable=protected-access
                                                [world.grammar['col_ref'],
                                                 Literal('BETWEEN'),
                                                 world.grammar['time_range_start'],
                                                 Literal(f'AND'),
                                                 world.grammar['time_range_end']]) == \
                                                Sequence(world.grammar['col_ref'],
                                                         world.grammar['ws'],
                                                         Literal('BETWEEN'),
                                                         world.grammar['ws'],
                                                         world.grammar['time_range_start'],
                                                         world.grammar['ws'],
                                                         Literal(f'AND'),
                                                         world.grammar['ws'],
                                                         world.grammar['time_range_end'],
                                                         world.grammar['ws'])

        world = AtisWorld(['i plan to travel on the tenth of 1993 july'])
        assert world.dates == [datetime(1993, 7, 10, 0, 0)]
Example #2
0
 def test_sequence(self):
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'),
         12)  # succeed
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobing'),
         None)  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1),
         5)  # non-0 pos
Example #3
0
    def test_atis_helper_methods(self):
        world = AtisWorld(
            [
                (
                    "what is the earliest flight in morning "
                    "1993 june fourth from boston to pittsburgh"
                )
            ]
        )
        assert world.dates == [datetime(1993, 6, 4, 0, 0)]
        assert world._get_numeric_database_values("time_range_end") == ["800", "1200"]
        assert world._get_sequence_with_spacing(
            world.grammar,
            [
                world.grammar["col_ref"],
                Literal("BETWEEN"),
                world.grammar["time_range_start"],
                Literal(f"AND"),
                world.grammar["time_range_end"],
            ],
        ) == Sequence(
            world.grammar["col_ref"],
            world.grammar["ws"],
            Literal("BETWEEN"),
            world.grammar["ws"],
            world.grammar["time_range_start"],
            world.grammar["ws"],
            Literal(f"AND"),
            world.grammar["ws"],
            world.grammar["time_range_end"],
            world.grammar["ws"],
        )

        world = AtisWorld(["i plan to travel on the tenth of 1993 july"])
        assert world.dates == [datetime(1993, 7, 10, 0, 0)]
Example #4
0
 def test_sequence_nodes(self):
     """Assert that ``Sequence`` produces nodes with the right children."""
     s = Sequence(Literal('heigh', name='greeting1'),
                  Literal('ho',    name='greeting2'), name='dwarf')
     text = 'heighho'
     self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
                                                                   Node(s.members[1], text, 5, 7)]))
Example #5
0
 def test_sequence(self):
     len_eq(
         Sequence(Regex('hi*'), Literal('lo'),
                  Regex('.ingo')).match('hiiiilobingo1234'), 12)  # succeed
     assert_raises(ParseError,
                   Sequence(Regex('hi*'), Literal('lo'),
                            Regex('.ingo')).match, 'hiiiilobing')  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5)  # non-0 pos
Example #6
0
    def test_zero_or_more(self):
        len_eq(ZeroOrMore(Literal('b')).match(''), 0)  # zero
        len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3)  # more

        len_eq(Regex('^').match(''), 0)  # Validate the next test.

        # Try to make it loop infinitely using a zero-length contained expression:
        len_eq(ZeroOrMore(Regex('^')).match(''), 0)
Example #7
0
 def test_one_or_more(self):
     len_eq(OneOrMore(Literal('b')).match('b'), 1)  # one
     len_eq(OneOrMore(Literal('b')).match('bbb'), 3)  # more
     len_eq(OneOrMore(Literal('b'), min=3).match('bbb'),
            3)  # with custom min; success
     assert_raises(ParseError,
                   OneOrMore(Literal('b'), min=3).match,
                   'bb')  # with custom min; failure
     len_eq(OneOrMore(Regex('^')).match('bb'), 0)  # attempt infinite loop
Example #8
0
    def _expressions_from_rules(self, rule_syntax):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        ws = Regex(r'\s+', name='ws')
        _ = Regex(r'[ \t]+', name='_')
        label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label')
        quantifier = Regex(r'[*+?]', name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                        ignore_case=True,
                        dot_all=True,
                        name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsux]*', ignore_case=True),
                         name='regex')
        atom = OneOf(label, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')
        term = OneOf(quantified, atom, name='term')
        another_term = Sequence(_, term, name='another_term')
        sequence = Sequence(term, OneOrMore(another_term), name='sequence')
        or_term = Sequence(_, Literal('/'), another_term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        and_term = Sequence(_, Literal('&'), another_term, name='and_term')
        anded = Sequence(term, OneOrMore(and_term), name='anded')
        poly_term = OneOf(anded, ored, sequence, name='poly_term')
        rhs = OneOf(poly_term, term, name='rhs')
        eol = Regex(r'[\r\n$]', name='eol')  # TODO: Support $.
        rule = Sequence(Optional(ws),
                        label,
                        Optional(_),
                        Literal('='),
                        Optional(_),
                        rhs,
                        Optional(_),
                        eol,
                        name='rule')
        rules = Sequence(OneOrMore(rule), Optional(ws), name='rules')

        # Use those hard-coded rules to parse the (possibly more extensive)
        # rule syntax. (For example, unless I start using parentheses in the
        # rule language definition itself, I should never have to hard-code
        # expressions for those above.)
        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Example #9
0
    def _expressions_from_rules(self, rule_syntax, custom_rules):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        comment = Regex(r'#[^\r\n]*', name='comment')
        meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
        _ = ZeroOrMore(meaninglessness, name='_')
        equals = Sequence(Literal('='), _, name='equals')
        label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
        reference = Sequence(label, Not(equals), name='reference')
        quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                                  ignore_case=True,
                                  dot_all=True,
                                  name='spaceless_literal')
        literal = Sequence(spaceless_literal, _, name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsuxa]*', ignore_case=True),
                         _,
                         name='regex')
        atom = OneOf(reference, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')

        term = OneOf(quantified, atom, name='term')
        not_term = Sequence(Literal('!'), term, _, name='not_term')
        term.members = (not_term, ) + term.members

        sequence = Sequence(term, OneOrMore(term), name='sequence')
        or_term = Sequence(Literal('/'), _, term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        rule = Sequence(label, equals, expression, name='rule')
        rules = Sequence(_, OneOrMore(rule), name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)

        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
Example #10
0
 def test_all_of(self):
     """``AllOf`` should return its own node, wrapping the last child."""
     expr = AllOf(Literal('a', name='lit_a'),
                  Regex('A', ignore_case=True, name='reg_a'), name='all_of')
     text = 'a'
     eq_(expr.match(text), Node('all_of', text, 0, 1, children=[
                                Node('reg_a', text, 0, 1)]))
Example #11
0
 def visit_spaceless_literal(self, spaceless_literal, visited_children):
     """Turn a string literal into a ``Literal`` that recognizes it."""
     # Piggyback on Python's string support so we can have backslash
     # escaping and niceties like \n, \t, etc.
     # string.decode('string_escape') would have been a lower-level
     # possibility.
     return Literal(ast.literal_eval(spaceless_literal.text))
Example #12
0
 def _add_numeric_nonterminal_to_grammar(self, nonterminal: str,
                                         new_grammar: Grammar) -> None:
     numbers = self._get_numeric_database_values(nonterminal)
     number_literals = [Literal(number) for number in numbers]
     if number_literals:
         new_grammar[nonterminal] = OneOf(*number_literals,
                                          name=nonterminal)
Example #13
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        self.assertRaises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
                                      Node(Lookahead(Literal('a')), s, 0, 0),
                                      Node(Regex(r'[a-z]+'), s, 0, 3)]))
Example #14
0
    def test_optional(self):
        tree = rule_grammar.parse('boy = "howdy"?\n')
        rules, default_rule = RuleVisitor().visit(tree)

        howdy = 'howdy'

        # It should turn into a Node from the Optional and another from the
        # Literal within.
        self.assertEqual(default_rule.parse(howdy), Node(default_rule, howdy, 0, 5, children=[
                                           Node(Literal("howdy"), howdy, 0, 5)]))
Example #15
0
 def test_regex(self):
     text = '~"[a-zA-Z_][a-zA-Z_0-9]*"LI'
     regex = rule_grammar['regex']
     self.assertEqual(rule_grammar['regex'].parse(text),
         Node(regex, text, 0, len(text), children=[
              Node(Literal('~'), text, 0, 1),
              Node(rule_grammar['spaceless_literal'], text, 1, 25, children=[
                  Node(rule_grammar['spaceless_literal'].members[0], text, 1, 25)]),
              Node(regex.members[2], text, 25, 27),
              Node(rule_grammar['_'], text, 27, 27)]))
Example #16
0
 def test_one_or_more_one(self):
     """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
     expr = OneOrMore(Literal('a', name='lit'), name='one')
     text = 'a'
     eq_(
         expr.match(text),
         Node(expr,
              text,
              0,
              1,
              children=[Node(expr.members[0], text, 0, 1)]))
Example #17
0
    def test_parse_success(self):
        """Make sure ``parse()`` returns the tree on success.

        There's not much more than that to test that we haven't already vetted
        above.

        """
        expr = OneOrMore(Literal('a', name='lit'), name='more')
        text = 'aa'
        self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
            Node(expr.members[0], text, 0, 1),
            Node(expr.members[0], text, 1, 2)]))
Example #18
0
    def test_optional(self):
        """``Optional`` should return its own node wrapping the succeeded child."""
        expr = Optional(Literal('a', name='lit'), name='opt')

        text = 'a'
        eq_(expr.match(text),
            Node('opt', text, 0, 1, children=[Node('lit', text, 0, 1)]))

        # Test failure of the Literal inside the Optional; the
        # LengthTests.test_optional is ambiguous for that.
        text = ''
        eq_(expr.match(text), Node('opt', text, 0, 0))
Example #19
0
    def test_expressions_from_rules(self):
        """Test the ``Grammar`` base class's ability to compile an expression
        tree from rules.

        That the correct ``Expression`` tree is built is already tested in
        ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
        ``_expressions_from_rules`` works.

        """
        greeting_grammar = Grammar('greeting = "hi" / "howdy"')
        tree = greeting_grammar.parse('hi')
        self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[
                       Node(Literal('hi'), 'hi', 0, 2)]))
Example #20
0
 def test_one_of(self):
     len_eq(OneOf(Literal('aaa'), Literal('bb')).match('aaa'),
            3)  # first alternative
     len_eq(OneOf(Literal('aaa'), Literal('bb')).match('bbaaa'),
            2)  # second
     assert_raises(ParseError,
                   OneOf(Literal('aaa'), Literal('bb')).match,
                   'aa')  # no match
def test_repr():
    """Test repr of ``Node``."""
    s = u'hai ö'
    boogie = u'böogie'
    n = Node(Literal(boogie), s, 0, 3, children=[
        Node(Literal(' '), s, 3, 4), Node(Literal(u'ö'), s, 4, 5)])
    eq_(repr(n),
        str("""s = {hai_o}\nNode({boogie}, s, 0, 3, children=[Node({space}, s, 3, 4), Node({o}, s, 4, 5)])""").format(
            hai_o=repr(s),
            boogie=repr(Literal(boogie)),
            space=repr(Literal(" ")),
            o=repr(Literal(u"ö")),
        )
        )
Example #22
0
 def test_simple_node(self):
     """Test that leaf expressions like ``Literal`` make the right nodes."""
     h = Literal('hello', name='greeting')
     eq_(h.match('hello'), Node('greeting', 'hello', 0, 5))
 def test_simple_node(self):
     """Test that leaf expressions like ``Literal`` make the right nodes."""
     h = Literal("hello", name="greeting")
     eq_(h.match("hello"), Node("greeting", "hello", 0, 5))
Example #24
0
 def test_visitation_exception(self):
     self.assertRaises(VisitationError,
                       ExplosiveFormatter().visit,
                       Node(Literal(''), '', 0, 0))
Example #25
0
 def test_optional(self):
     len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'),
            1)  # contained expr fails
     len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'),
            2)  # contained expr succeeds
Example #26
0
 def test_regex(self):
     len_eq(Literal('hello').match('ehello', 1), 5)  # simple
     len_eq(Regex('hello*').match('hellooo'), 7)  # *
     assert_raises(ParseError, Regex('hello*').match, 'goodbye')  # no match
     len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
 def test_simple_node(self):
     """Test that leaf expressions like ``Literal`` make the right nodes."""
     h = Literal('hello', name='greeting')
     eq_(h.match('hello'), Node('greeting', 'hello', 0, 5))
Example #28
0
 def test_str(self):
     """Test str and unicode of ``Node``."""
     n = Node(Literal('something', name='text'), 'o hai', 0, 5)
     good = '<Node called "text" matching "o hai">'
     self.assertEqual(str(n), good)
Example #29
0
 def test_one_of(self):
     """``OneOf`` should return its own node, wrapping the child that succeeds."""
     o = OneOf(Literal('a', name='lit'), name='one_of')
     text = 'aa'
     eq_(o.match(text),
         Node('one_of', text, 0, 1, children=[Node('lit', text, 0, 1)]))
Example #30
0
    def _update_grammar(self):
        """
        We create a new ``Grammar`` object from the one in ``AtisSqlTableContext``, that also
        has the new entities that are extracted from the utterance. Stitching together the expressions
        to form the grammar is a little tedious here, but it is worth it because we don't have to create
        a new grammar from scratch. Creating a new grammar is expensive because we have many production
        rules that have all database values in the column on the right hand side. We update the expressions
        bottom up, since the higher level expressions may refer to the lower level ones. For example, the
        ternary expression will refer to the start and end times.
        """

        # This will give us a shallow copy. We have to be careful here because the ``Grammar`` object
        # contains ``Expression`` objects that have tuples containing the members of that expression.
        # We have to create new sub-expression objects so that original grammar is not mutated.
        new_grammar = copy(AtisWorld.sql_table_context.grammar)

        for numeric_nonterminal in NUMERIC_NONTERMINALS:
            self._add_numeric_nonterminal_to_grammar(numeric_nonterminal,
                                                     new_grammar)
        self._update_expression_reference(new_grammar, 'pos_value', 'number')

        ternary_expressions = [
            self._get_sequence_with_spacing(new_grammar, [
                new_grammar['col_ref'],
                Literal('BETWEEN'), new_grammar['time_range_start'],
                Literal(f'AND'), new_grammar['time_range_end']
            ]),
            self._get_sequence_with_spacing(new_grammar, [
                new_grammar['col_ref'],
                Literal('NOT'),
                Literal('BETWEEN'), new_grammar['time_range_start'],
                Literal(f'AND'), new_grammar['time_range_end']
            ]),
            self._get_sequence_with_spacing(new_grammar, [
                new_grammar['col_ref'],
                Literal('not'),
                Literal('BETWEEN'), new_grammar['time_range_start'],
                Literal(f'AND'), new_grammar['time_range_end']
            ])
        ]

        new_grammar['ternaryexpr'] = OneOf(*ternary_expressions,
                                           name='ternaryexpr')
        self._update_expression_reference(new_grammar, 'condition',
                                          'ternaryexpr')

        new_binary_expressions = []

        fare_round_trip_cost_expression = \
                    self._get_sequence_with_spacing(new_grammar,
                                                    [Literal('fare'),
                                                     Literal('.'),
                                                     Literal('round_trip_cost'),
                                                     new_grammar['binaryop'],
                                                     new_grammar['fare_round_trip_cost']])
        new_binary_expressions.append(fare_round_trip_cost_expression)

        fare_one_direction_cost_expression = \
                    self._get_sequence_with_spacing(new_grammar,
                                                    [Literal('fare'),
                                                     Literal('.'),
                                                     Literal('one_direction_cost'),
                                                     new_grammar['binaryop'],
                                                     new_grammar['fare_one_direction_cost']])

        new_binary_expressions.append(fare_one_direction_cost_expression)

        flight_number_expression = \
                    self._get_sequence_with_spacing(new_grammar,
                                                    [Literal('flight'),
                                                     Literal('.'),
                                                     Literal('flight_number'),
                                                     new_grammar['binaryop'],
                                                     new_grammar['flight_number']])
        new_binary_expressions.append(flight_number_expression)

        if self.dates:
            year_binary_expression = self._get_sequence_with_spacing(
                new_grammar, [
                    Literal('date_day'),
                    Literal('.'),
                    Literal('year'), new_grammar['binaryop'],
                    new_grammar['year_number']
                ])
            month_binary_expression = self._get_sequence_with_spacing(
                new_grammar, [
                    Literal('date_day'),
                    Literal('.'),
                    Literal('month_number'), new_grammar['binaryop'],
                    new_grammar['month_number']
                ])
            day_binary_expression = self._get_sequence_with_spacing(
                new_grammar, [
                    Literal('date_day'),
                    Literal('.'),
                    Literal('day_number'), new_grammar['binaryop'],
                    new_grammar['day_number']
                ])
            new_binary_expressions.extend([
                year_binary_expression, month_binary_expression,
                day_binary_expression
            ])

        new_binary_expressions = new_binary_expressions + list(
            new_grammar['biexpr'].members)
        new_grammar['biexpr'] = OneOf(*new_binary_expressions, name='biexpr')
        self._update_expression_reference(new_grammar, 'condition', 'biexpr')
        return new_grammar
Example #31
0
 def visit_spaceless_literal(self, spaceless_literal, visited_children):
     """Turn a string literal into a ``Literal`` that recognizes it."""
     return Literal(evaluate_string(spaceless_literal.text))
Example #32
0
 def test_zero_or_more_zero(self):
     """Test the 0 case of ``ZeroOrMore``; it should still return a node."""
     expr = ZeroOrMore(Literal('a'), name='zero')
     text = ''
     eq_(expr.match(text), Node('zero', text, 0, 0))