예제 #1
0
 def test_sequence(self):
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'),
         12)  # succeed
     len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobing'),
         None)  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1),
         5)  # non-0 pos
예제 #2
0
 def test_sequence(self):
     len_eq(
         Sequence(Regex('hi*'), Literal('lo'),
                  Regex('.ingo')).match('hiiiilobingo1234'), 12)  # succeed
     assert_raises(ParseError,
                   Sequence(Regex('hi*'), Literal('lo'),
                            Regex('.ingo')).match, 'hiiiilobing')  # don't
     len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5)  # non-0 pos
예제 #3
0
    def _expressions_from_rules(self, rule_syntax):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        ws = Regex(r'\s+', name='ws')
        _ = Regex(r'[ \t]+', name='_')
        label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label')
        quantifier = Regex(r'[*+?]', name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                        ignore_case=True,
                        dot_all=True,
                        name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsux]*', ignore_case=True),
                         name='regex')
        atom = OneOf(label, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')
        term = OneOf(quantified, atom, name='term')
        another_term = Sequence(_, term, name='another_term')
        sequence = Sequence(term, OneOrMore(another_term), name='sequence')
        or_term = Sequence(_, Literal('/'), another_term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        and_term = Sequence(_, Literal('&'), another_term, name='and_term')
        anded = Sequence(term, OneOrMore(and_term), name='anded')
        poly_term = OneOf(anded, ored, sequence, name='poly_term')
        rhs = OneOf(poly_term, term, name='rhs')
        eol = Regex(r'[\r\n$]', name='eol')  # TODO: Support $.
        rule = Sequence(Optional(ws),
                        label,
                        Optional(_),
                        Literal('='),
                        Optional(_),
                        rhs,
                        Optional(_),
                        eol,
                        name='rule')
        rules = Sequence(OneOrMore(rule), Optional(ws), name='rules')

        # Use those hard-coded rules to parse the (possibly more extensive)
        # rule syntax. (For example, unless I start using parentheses in the
        # rule language definition itself, I should never have to hard-code
        # expressions for those above.)
        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
    def test_atis_helper_methods(self):  # pylint: disable=no-self-use
        world = AtisWorld([("what is the earliest flight in morning "
                            "1993 june fourth from boston to pittsburgh")])
        assert world.dates == [datetime(1993, 6, 4, 0, 0)]
        assert world._get_numeric_database_values('time_range_end') == [
            '800', '1200'
        ]  # pylint: disable=protected-access
        assert world._get_sequence_with_spacing(world.grammar, # pylint: disable=protected-access
                                                [world.grammar['col_ref'],
                                                 Literal('BETWEEN'),
                                                 world.grammar['time_range_start'],
                                                 Literal(f'AND'),
                                                 world.grammar['time_range_end']]) == \
                                                Sequence(world.grammar['col_ref'],
                                                         world.grammar['ws'],
                                                         Literal('BETWEEN'),
                                                         world.grammar['ws'],
                                                         world.grammar['time_range_start'],
                                                         world.grammar['ws'],
                                                         Literal(f'AND'),
                                                         world.grammar['ws'],
                                                         world.grammar['time_range_end'],
                                                         world.grammar['ws'])

        world = AtisWorld(['i plan to travel on the tenth of 1993 july'])
        assert world.dates == [datetime(1993, 7, 10, 0, 0)]
예제 #5
0
 def test_sequence_nodes(self):
     """Assert that ``Sequence`` produces nodes with the right children."""
     s = Sequence(Literal('heigh', name='greeting1'),
                  Literal('ho',    name='greeting2'), name='dwarf')
     text = 'heighho'
     self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
                                                                   Node(s.members[1], text, 5, 7)]))
예제 #6
0
    def test_atis_helper_methods(self):
        world = AtisWorld(
            [
                (
                    "what is the earliest flight in morning "
                    "1993 june fourth from boston to pittsburgh"
                )
            ]
        )
        assert world.dates == [datetime(1993, 6, 4, 0, 0)]
        assert world._get_numeric_database_values("time_range_end") == ["800", "1200"]
        assert world._get_sequence_with_spacing(
            world.grammar,
            [
                world.grammar["col_ref"],
                Literal("BETWEEN"),
                world.grammar["time_range_start"],
                Literal(f"AND"),
                world.grammar["time_range_end"],
            ],
        ) == Sequence(
            world.grammar["col_ref"],
            world.grammar["ws"],
            Literal("BETWEEN"),
            world.grammar["ws"],
            world.grammar["time_range_start"],
            world.grammar["ws"],
            Literal(f"AND"),
            world.grammar["ws"],
            world.grammar["time_range_end"],
            world.grammar["ws"],
        )

        world = AtisWorld(["i plan to travel on the tenth of 1993 july"])
        assert world.dates == [datetime(1993, 7, 10, 0, 0)]
예제 #7
0
    def test_lazy_custom_rules(self):
        """Make sure LazyReferences manually shoved into custom rules are
        resolved.

        Incidentally test passing full-on Expressions as custom rules and
        having a custom rule as the default one.

        """
        grammar = Grammar("""
            four = '4'
            five = '5'""",
                          forty_five=Sequence(
                              LazyReference('four'),
                              LazyReference('five'),
                              name='forty_five')).default('forty_five')
        s = '45'
        eq_(
            grammar.parse(s),
            Node(grammar['forty_five'],
                 s,
                 0,
                 2,
                 children=[
                     Node(grammar['four'], s, 0, 1),
                     Node(grammar['five'], s, 1, 2)
                 ]))
예제 #8
0
 def _get_sequence_with_spacing(self, # pylint: disable=no-self-use
                                new_grammar,
                                expressions: List[Expression],
                                name: str = '') -> Sequence:
     """
     This is a helper method for generating sequences, since we often want a list of expressions
     with whitespaces between them.
     """
     expressions = [subexpression
                    for expression in expressions
                    for subexpression in (expression, new_grammar['ws'])]
     return Sequence(*expressions, name=name)
예제 #9
0
 def visit_sequence(self, node, sequence):
     """A parsed Sequence looks like [term node, OneOrMore node of
     ``another_term``s]. Flatten it out."""
     term, other_terms = sequence
     return Sequence(term, *other_terms)
예제 #10
0
    def _expressions_from_rules(self, rule_syntax, custom_rules):
        """Return the rules for parsing the grammar definition syntax.

        Return a 2-tuple: a dict of rule names pointing to their expressions,
        and then the top-level expression for the first rule.

        """
        # Hard-code enough of the rules to parse the grammar that describes the
        # grammar description language, to bootstrap:
        comment = Regex(r'#[^\r\n]*', name='comment')
        meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness')
        _ = ZeroOrMore(meaninglessness, name='_')
        equals = Sequence(Literal('='), _, name='equals')
        label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label')
        reference = Sequence(label, Not(equals), name='reference')
        quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier')
        # This pattern supports empty literals. TODO: A problem?
        spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"',
                                  ignore_case=True,
                                  dot_all=True,
                                  name='spaceless_literal')
        literal = Sequence(spaceless_literal, _, name='literal')
        regex = Sequence(Literal('~'),
                         literal,
                         Regex('[ilmsuxa]*', ignore_case=True),
                         _,
                         name='regex')
        atom = OneOf(reference, literal, regex, name='atom')
        quantified = Sequence(atom, quantifier, name='quantified')

        term = OneOf(quantified, atom, name='term')
        not_term = Sequence(Literal('!'), term, _, name='not_term')
        term.members = (not_term, ) + term.members

        sequence = Sequence(term, OneOrMore(term), name='sequence')
        or_term = Sequence(Literal('/'), _, term, name='or_term')
        ored = Sequence(term, OneOrMore(or_term), name='ored')
        expression = OneOf(ored, sequence, term, name='expression')
        rule = Sequence(label, equals, expression, name='rule')
        rules = Sequence(_, OneOrMore(rule), name='rules')

        # Use those hard-coded rules to parse the (more extensive) rule syntax.
        # (For example, unless I start using parentheses in the rule language
        # definition itself, I should never have to hard-code expressions for
        # those above.)

        rule_tree = rules.parse(rule_syntax)

        # Turn the parse tree into a map of expressions:
        return RuleVisitor().visit(rule_tree)
예제 #11
0
 def test_optional(self):
     len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'),
            1)  # contained expr fails
     len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'),
            2)  # contained expr succeeds
예제 #12
0
    def visit_lookahead_term(self, lookahead_term, (ampersand, term, _)):
        return Lookahead(term)

    def visit_not_term(self, not_term, (exclamation, term, _)):
        return Not(term)

    def visit_rule(self, rule, (label, equals, expression)):
        """Assign a name to the Expression and return it."""
        expression.name = label  # Assign a name to the expr.
        return expression

    def visit_sequence(self, sequence, (term, other_terms)):
        """A parsed Sequence looks like [term node, OneOrMore node of
        ``another_term``s]. Flatten it out."""
        return Sequence(term, *other_terms)

    def visit_ored(self, ored, (first_term, other_terms)):
        return OneOf(first_term, *other_terms)

    def visit_or_term(self, or_term, (slash, _, term)):
        """Return just the term from an ``or_term``.

        We already know it's going to be ored, from the containing ``ored``.

        """
        return term

    def visit_label(self, label, (name, _)):
        """Turn a label into a unicode string."""
        return name.text
예제 #13
0
 def visit_sequence(self, sequence, xxx_todo_changeme6):
     """A parsed Sequence looks like [term node, OneOrMore node of
     ``another_term``s]. Flatten it out."""
     (term, other_terms) = xxx_todo_changeme6
     return Sequence(term, *other_terms)