def test_sequence(self): len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobing'), None) # don't len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
def test_sequence(self): len_eq( Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed assert_raises(ParseError, Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match, 'hiiiilobing') # don't len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
def _expressions_from_rules(self, rule_syntax): """Return the rules for parsing the grammar definition syntax. Return a 2-tuple: a dict of rule names pointing to their expressions, and then the top-level expression for the first rule. """ # Hard-code enough of the rules to parse the grammar that describes the # grammar description language, to bootstrap: ws = Regex(r'\s+', name='ws') _ = Regex(r'[ \t]+', name='_') label = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*', name='label') quantifier = Regex(r'[*+?]', name='quantifier') # This pattern supports empty literals. TODO: A problem? literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"', ignore_case=True, dot_all=True, name='literal') regex = Sequence(Literal('~'), literal, Regex('[ilmsux]*', ignore_case=True), name='regex') atom = OneOf(label, literal, regex, name='atom') quantified = Sequence(atom, quantifier, name='quantified') term = OneOf(quantified, atom, name='term') another_term = Sequence(_, term, name='another_term') sequence = Sequence(term, OneOrMore(another_term), name='sequence') or_term = Sequence(_, Literal('/'), another_term, name='or_term') ored = Sequence(term, OneOrMore(or_term), name='ored') and_term = Sequence(_, Literal('&'), another_term, name='and_term') anded = Sequence(term, OneOrMore(and_term), name='anded') poly_term = OneOf(anded, ored, sequence, name='poly_term') rhs = OneOf(poly_term, term, name='rhs') eol = Regex(r'[\r\n$]', name='eol') # TODO: Support $. rule = Sequence(Optional(ws), label, Optional(_), Literal('='), Optional(_), rhs, Optional(_), eol, name='rule') rules = Sequence(OneOrMore(rule), Optional(ws), name='rules') # Use those hard-coded rules to parse the (possibly more extensive) # rule syntax. (For example, unless I start using parentheses in the # rule language definition itself, I should never have to hard-code # expressions for those above.) rule_tree = rules.parse(rule_syntax) # Turn the parse tree into a map of expressions: return RuleVisitor().visit(rule_tree)
def test_atis_helper_methods(self): # pylint: disable=no-self-use world = AtisWorld([("what is the earliest flight in morning " "1993 june fourth from boston to pittsburgh")]) assert world.dates == [datetime(1993, 6, 4, 0, 0)] assert world._get_numeric_database_values('time_range_end') == [ '800', '1200' ] # pylint: disable=protected-access assert world._get_sequence_with_spacing(world.grammar, # pylint: disable=protected-access [world.grammar['col_ref'], Literal('BETWEEN'), world.grammar['time_range_start'], Literal(f'AND'), world.grammar['time_range_end']]) == \ Sequence(world.grammar['col_ref'], world.grammar['ws'], Literal('BETWEEN'), world.grammar['ws'], world.grammar['time_range_start'], world.grammar['ws'], Literal(f'AND'), world.grammar['ws'], world.grammar['time_range_end'], world.grammar['ws']) world = AtisWorld(['i plan to travel on the tenth of 1993 july']) assert world.dates == [datetime(1993, 7, 10, 0, 0)]
def test_sequence_nodes(self): """Assert that ``Sequence`` produces nodes with the right children.""" s = Sequence(Literal('heigh', name='greeting1'), Literal('ho', name='greeting2'), name='dwarf') text = 'heighho' self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5), Node(s.members[1], text, 5, 7)]))
def test_atis_helper_methods(self): world = AtisWorld( [ ( "what is the earliest flight in morning " "1993 june fourth from boston to pittsburgh" ) ] ) assert world.dates == [datetime(1993, 6, 4, 0, 0)] assert world._get_numeric_database_values("time_range_end") == ["800", "1200"] assert world._get_sequence_with_spacing( world.grammar, [ world.grammar["col_ref"], Literal("BETWEEN"), world.grammar["time_range_start"], Literal(f"AND"), world.grammar["time_range_end"], ], ) == Sequence( world.grammar["col_ref"], world.grammar["ws"], Literal("BETWEEN"), world.grammar["ws"], world.grammar["time_range_start"], world.grammar["ws"], Literal(f"AND"), world.grammar["ws"], world.grammar["time_range_end"], world.grammar["ws"], ) world = AtisWorld(["i plan to travel on the tenth of 1993 july"]) assert world.dates == [datetime(1993, 7, 10, 0, 0)]
def test_lazy_custom_rules(self): """Make sure LazyReferences manually shoved into custom rules are resolved. Incidentally test passing full-on Expressions as custom rules and having a custom rule as the default one. """ grammar = Grammar(""" four = '4' five = '5'""", forty_five=Sequence( LazyReference('four'), LazyReference('five'), name='forty_five')).default('forty_five') s = '45' eq_( grammar.parse(s), Node(grammar['forty_five'], s, 0, 2, children=[ Node(grammar['four'], s, 0, 1), Node(grammar['five'], s, 1, 2) ]))
def _get_sequence_with_spacing(self, # pylint: disable=no-self-use new_grammar, expressions: List[Expression], name: str = '') -> Sequence: """ This is a helper method for generating sequences, since we often want a list of expressions with whitespaces between them. """ expressions = [subexpression for expression in expressions for subexpression in (expression, new_grammar['ws'])] return Sequence(*expressions, name=name)
def visit_sequence(self, node, sequence): """A parsed Sequence looks like [term node, OneOrMore node of ``another_term``s]. Flatten it out.""" term, other_terms = sequence return Sequence(term, *other_terms)
def _expressions_from_rules(self, rule_syntax, custom_rules): """Return the rules for parsing the grammar definition syntax. Return a 2-tuple: a dict of rule names pointing to their expressions, and then the top-level expression for the first rule. """ # Hard-code enough of the rules to parse the grammar that describes the # grammar description language, to bootstrap: comment = Regex(r'#[^\r\n]*', name='comment') meaninglessness = OneOf(Regex(r'\s+'), comment, name='meaninglessness') _ = ZeroOrMore(meaninglessness, name='_') equals = Sequence(Literal('='), _, name='equals') label = Sequence(Regex(r'[a-zA-Z_][a-zA-Z_0-9]*'), _, name='label') reference = Sequence(label, Not(equals), name='reference') quantifier = Sequence(Regex(r'[*+?]'), _, name='quantifier') # This pattern supports empty literals. TODO: A problem? spaceless_literal = Regex(r'u?r?"[^"\\]*(?:\\.[^"\\]*)*"', ignore_case=True, dot_all=True, name='spaceless_literal') literal = Sequence(spaceless_literal, _, name='literal') regex = Sequence(Literal('~'), literal, Regex('[ilmsuxa]*', ignore_case=True), _, name='regex') atom = OneOf(reference, literal, regex, name='atom') quantified = Sequence(atom, quantifier, name='quantified') term = OneOf(quantified, atom, name='term') not_term = Sequence(Literal('!'), term, _, name='not_term') term.members = (not_term, ) + term.members sequence = Sequence(term, OneOrMore(term), name='sequence') or_term = Sequence(Literal('/'), _, term, name='or_term') ored = Sequence(term, OneOrMore(or_term), name='ored') expression = OneOf(ored, sequence, term, name='expression') rule = Sequence(label, equals, expression, name='rule') rules = Sequence(_, OneOrMore(rule), name='rules') # Use those hard-coded rules to parse the (more extensive) rule syntax. # (For example, unless I start using parentheses in the rule language # definition itself, I should never have to hard-code expressions for # those above.) rule_tree = rules.parse(rule_syntax) # Turn the parse tree into a map of expressions: return RuleVisitor().visit(rule_tree)
def test_optional(self): len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'), 1) # contained expr fails len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'), 2) # contained expr succeeds
def visit_lookahead_term(self, lookahead_term, (ampersand, term, _)): return Lookahead(term) def visit_not_term(self, not_term, (exclamation, term, _)): return Not(term) def visit_rule(self, rule, (label, equals, expression)): """Assign a name to the Expression and return it.""" expression.name = label # Assign a name to the expr. return expression def visit_sequence(self, sequence, (term, other_terms)): """A parsed Sequence looks like [term node, OneOrMore node of ``another_term``s]. Flatten it out.""" return Sequence(term, *other_terms) def visit_ored(self, ored, (first_term, other_terms)): return OneOf(first_term, *other_terms) def visit_or_term(self, or_term, (slash, _, term)): """Return just the term from an ``or_term``. We already know it's going to be ored, from the containing ``ored``. """ return term def visit_label(self, label, (name, _)): """Turn a label into a unicode string.""" return name.text
def visit_sequence(self, sequence, xxx_todo_changeme6): """A parsed Sequence looks like [term node, OneOrMore node of ``another_term``s]. Flatten it out.""" (term, other_terms) = xxx_todo_changeme6 return Sequence(term, *other_terms)