def test_keyword_matches_on_word_boundary(): grammar = r""" S: "for" name=ID "=" from=INT "to" to=INT; terminals ID: /\w+/; INT: /\d+/; """ g = Grammar.from_string(grammar) parser = Parser(g) # This will not raise an error parser.parse('forid=10 to20') # We add KEYWORD rule to the grammar to match ID-like keywords. grammar += r"KEYWORD: /\w+/;" g = Grammar.from_string(grammar) parser = Parser(g) with pytest.raises(ParseError, match='forid=10 t" => Expected: for'): # This *will* raise an error parser.parse('forid=10 to20') with pytest.raises(ParseError, match='Expected: to'): # This *will* also raise an error parser.parse('for id=10 to20') # But this is OK parser.parse('for id=10 to 20') parser.parse('for for=10 to 20')
def test_cyclic_grammar_3(): """ Grammar with indirect cycle. r:EMPTY->A ; r:A->S; r:EMPTY->A; r:SA->S; r:EMPTY->A; r:SA->S;... """ grammar = """ S: S A | A; A: "a" | EMPTY; """ g = Grammar.from_string(grammar) # In this grammar we have 3 S/R conflicts where each reduction is EMPTY. # If we turn off prefer shifts over empty strategy in LR parser # we will get S/R conflict with pytest.raises(SRConflicts): Parser(g, prefer_shifts_over_empty=False) # By default there is no S/R conflict with prefer shifts over # empty strategy Parser(g) p = GLRParser(g) results = p.parse('aa') with pytest.raises(LoopError): len(results)
def test_highly_ambiguous_grammar(): """ This grammar has both Shift/Reduce and Reduce/Reduce conflicts and thus can't be parsed by a deterministic LR parsing. Shift/Reduce can be resolved by prefer_shifts strategy. """ grammar = """ S: "b" | S S | S S S; """ g = Grammar.from_string(grammar) with pytest.raises(SRConflicts): Parser(g, prefer_shifts=False) # S/R are resolved by selecting prefer_shifts strategy. # But R/R conflicts remain. with pytest.raises(RRConflicts): Parser(g, prefer_shifts=True) # GLR parser handles this fine. p = GLRParser(g, build_tree=True) # For three tokens we have 3 valid derivations/trees. results = p.parse("bbb") assert len(results) == 3 # For 4 tokens we have 10 valid derivations. results = p.parse("bbbb") assert len(results) == 10
def test_partial_parse(): """ Not giving EOF at the end of the sequence enables parsing of the beginning of the input string. """ grammar = """ S: 'a' B; B: 'b'; """ g = Grammar.from_string(grammar) parser = Parser(g) # Parser should succesfuly parse 'ab' at the beggining. parser.parse('abc') # But if EOF is given it will match only at the end of the string, # thus, the whole string must be parsed in order for parsing to # succeed. grammar = """ S: 'a' B EOF; B: 'b'; """ g = Grammar.from_string(grammar) parser = Parser(g) parser.parse('a b') with pytest.raises(ParseError): parser.parse('a b c')
def test_invalid_number_of_actions(): """ Test that parser error is raised if rule is given list of actions where there is less/more actions than rule productions. """ grammar = ''' S: A+ | B+; A: 'a'; B: 'b'; ''' g = Grammar.from_string(grammar) def some_action(_, nodes): return nodes[0] actions = { 'S': [some_action, some_action] } Parser(g, actions=actions) actions = { 'S': [some_action] } with pytest.raises(ParserInitError, match=r'Length of list of actions must match.*'): Parser(g, actions=actions) actions = { 'S': [some_action, some_action, some_action] } with pytest.raises(ParserInitError, match=r'Length of list of actions must match.*'): Parser(g, actions=actions)
def test_layout_terminal(): """ Test that layout definition may be just a terminal rule. """ grammar = r""" S: "a" "b"; LAYOUT: "c"; """ g = Grammar.from_string(grammar) parser = Parser(g) with pytest.raises(ParseError): parser.parse("a b") parser.parse("cacbc") grammar = r""" S: "a" "b"; LAYOUT: DIGITS; terminals DIGITS: /\d*/; """ g = Grammar.from_string(grammar) parser = Parser(g) with pytest.raises(ParseError): parser.parse("a b") result = parser.parse("4444a23b545") assert result == ['a', 'b']
def test_action_override(): """ Explicitely provided action in `actions` param overrides default or grammar provided. """ grammar = """ S: Foo Bar; @pass_nochange Bar: "1" a; terminals @pass_nochange Foo: 'foo'; a: "a"; """ g = Grammar.from_string(grammar) p = Parser(g) input_str = "foo 1 a" result = p.parse(input_str) assert result == ["foo", ["1", "a"]] actions = {"Foo": lambda _, __: "eggs", "Bar": lambda _, __: "bar reduce"} p = Parser(g, actions=actions) result = p.parse(input_str) assert result == ["eggs", "bar reduce"] # Test with actions call postponing p = Parser(g, build_tree=True, actions=actions) tree = p.parse(input_str) result = p.call_actions(tree) assert result == ["eggs", "bar reduce"]
def test_partial_parse(): """ Test `consume_input` parser parameter. """ grammar = """ S: 'a' B; B: 'b'; """ g = Grammar.from_string(grammar) parser = Parser(g, consume_input=False) # Parser should succesfuly parse 'ab' at the beginning. parser.parse('abc') # But if `consume_input` is not set to `False` it should be `True` by # default and the parser will not accept partial parses. grammar = """ S: 'a' B; B: 'b'; """ g = Grammar.from_string(grammar) parser = Parser(g) parser.parse('a b') with pytest.raises(ParseError): parser.parse('a b c')
def test_case_insensitive_parsing(): """ By default parglare is case sensitive. This test parsing without case sensitivity. """ grammar = """ S: "one" "Two" Astart; terminals Astart: /Aa\w+/; """ g = Grammar.from_string(grammar) # By default parsing is case sensitive for both string and regex matches. parser = Parser(g) with pytest.raises(ParseError): parser.parse('One Two Aaa') with pytest.raises(ParseError): parser.parse('one Two AAa') g = Grammar.from_string(grammar, ignore_case=True) parser = Parser(g) parser.parse('One Two Aaa') parser.parse('one Two AAa')
def test_save_load_table(): """ Test basic table save/load cycle with table file creation. """ calc_file = os.path.join(this_folder, 'calc.pg') variable_file = os.path.join(this_folder, 'variable.pg') input_str = 'a = 5 1 + 2 * a - 7' input_str_result = 1 + 2 * 5 - 7 grammar = Grammar.from_file(calc_file) table_file = os.path.join(this_folder, 'calc.pgt') # remove table file if exists try: os.remove(table_file) except OSError: pass parser = Parser(grammar) assert parser.parse(input_str) == input_str_result # Table file must be produced by parser construction. assert os.path.exists(table_file) last_mtime = os.path.getmtime(table_file) time.sleep(1) parser = Parser(grammar) # Last generated table should be used during parser construction. # Currently, it is hard to check this so we'll only check if # table_file is not regenerated. assert last_mtime == os.path.getmtime(table_file) # Parser constructed from persisted table should produce the same result. assert parser.parse(input_str) == input_str_result # We are now touching variable.pg file # This should trigger table file regeneration with open(variable_file, 'a'): os.utime(variable_file, None) parser = Parser(grammar) assert parser.parse(input_str) == input_str_result # We verify that the table file is newer. assert last_mtime < os.path.getmtime(table_file) # Now we test that force_load_table will load table even if not # newer than the grammar. time.sleep(1) with open(variable_file, 'a'): os.utime(variable_file, None) last_mtime = os.path.getmtime(table_file) parser = Parser(grammar, force_load_table=True) assert last_mtime == os.path.getmtime(table_file) parser = Parser(grammar) assert last_mtime < os.path.getmtime(table_file)
def test_imported_actions_override_by_grammar_actions(): """ Test that actions loaded from `*_actions.py` files can override actions imported from other grammar files. """ g = Grammar.from_file( os.path.join(this_folder, 'in_grammar_by_symbol_name/model.pg')) model = Parser(g).parse(model_str) assert model.modelID == 43 g = Grammar.from_file( os.path.join(this_folder, 'in_grammar_by_action_name/model.pg')) model = Parser(g).parse(model_str) assert model.modelID == 43
def test_obj_position(): """ Test that object start/end position is set properly. """ grammar = r""" S: "first" seconds=Second+; Second: value=digits; terminals digits:/\d+/; """ g = Grammar.from_string(grammar) parser = Parser(g) result = parser.parse(""" first 45 56 66 3434342 """) n = result.seconds[1] assert n._pg_start_position == 14 assert n._pg_end_position == 16 n = result.seconds[3] assert n._pg_start_position == 24 assert n._pg_end_position == 31
def test_multiple_assignment_with_repetitions(): """ Test assignment of repetition. """ grammar = """ S: "1" first=some_match+[comma] second?=some_match* "3"; terminals some_match: "2"; comma: ","; """ g = Grammar.from_string(grammar) assert assignment_in_productions(g.productions, 'S', 'first') assert assignment_in_productions(g.productions, 'S', 'second') called = [False] def act_s(_, nodes, first, second): called[0] = True assert first == ["2", "2"] assert second is True return nodes actions = {"S": act_s} p = Parser(g, actions=actions) input_str = '1 2, 2 2 2 2 3' result = p.parse(input_str) assert result == ["1", ["2", "2"], ["2", "2", "2"], "3"] assert called[0]
def test_assignment_of_repetition(): """ Test assignment of repetition. """ grammar = """ S: "1" first=some_match+ "3"; terminals some_match: "2"; """ g = Grammar.from_string(grammar) assert assignment_in_productions(g.productions, 'S', 'first') called = [False] def act_s(_, nodes, first): called[0] = True assert first == ["2", "2"] return nodes actions = {"S": act_s} p = Parser(g, actions=actions) input_str = '1 2 2 3' result = p.parse(input_str) assert result == ["1", ["2", "2"], "3"] assert called[0]
def test_object_children_order(): """Children may depend on the concrete production that matched. Test that order given in `_pg_children` is the same as the order provided in the grammar (this may be important for tree traversal order). """ grammar = r''' S: a=A b=B | b=B a=A | b=B; A: val="a"; B: val="b"; ''' g = Grammar.from_string(grammar) p = Parser(g) ast = p.parse('a b') res = ['a', 'b'] assert len(res) == len(ast._pg_children) assert all((x.val == y for x, y in zip(ast._pg_children, res))) ast = p.parse('b a') res = ['b', 'a'] assert len(res) == len(ast._pg_children) assert all((x.val == y for x, y in zip(ast._pg_children, res))) ast = p.parse('b') res = ['b'] assert len(res) == len(ast._pg_children) assert all((x.val == y for x, y in zip(ast._pg_children, res)))
def test_lr2_grammar(): grammar = """ Model: Prods EOF; Prods: Prod | Prods Prod; Prod: ID "=" ProdRefs; ProdRefs: ID | ProdRefs ID; ID: /\w+/; """ g = Grammar.from_string(grammar) # This grammar is not LR(1) as it requires # at least two tokens of lookahead to decide # what to do on each ID from the right side. # If '=' is after ID than it should reduce "Prod" # else it should reduce ID as ProdRefs. with pytest.raises(SRConflicts): Parser(g) # But it can be parsed unambiguously by GLR. p = GLRParser(g) txt = """ First = One Two three Second = Foo Bar Third = Baz """ results = p.parse(txt) assert len(results) == 1
def refinementChecker(scriptFile): try: g = Grammar.from_file("grammar") parser = Parser(g, actions=actions) except Exception as e: print e print "Parse generation: Failed." print "Terminating." sys.exit() print "Parser generation: Done." try: script = parser.parse_file(scriptFile) print "Parse input: Done." except Exception as e: print e print "Parse input: Failed." print "Terminating." sys.exit() try: execute(script) except Exception as e: print e print "Script execution: Failed." print "Terminating." sys.exit() print "Script execution: Done." print "Terminating."
def test_infinite_recursions(): """ If rule have no recursion termination alternative as for example: Elements: Elements Element; instead of: Elements: Elements Element | Element; first set of "Elements" will be empty. GrammarError will be produced during parser construction. """ grammar = """ Elements: Elements Element; Element: "a" | "b"; """ g = Grammar.from_string(grammar) with pytest.raises(GrammarError) as e: Parser(g) assert 'First set empty for grammar symbol "Elements"' in str(e) assert 'infinite recursion' in str(e)
def test_assignment_bool(): """ Test bool assignment. """ grammar = """ S: "1" first?=some_match "3"; some_match: "2"; """ g = Grammar.from_string(grammar) assert assignment_in_productions(g.productions, 'S', 'first') called = [False] def act_s(_, nodes, first): called[0] = True assert first is True return nodes actions = {"S": act_s} p = Parser(g, actions=actions) input_str = '1 2 3' result = p.parse(input_str) assert result == ["1", "2", "3"] assert called[0]
def test_failed_disambiguation(cf): grammar = r""" S: First | Second | Third; terminals First: /\d+\.\d+/ {15}; Second: '14.7'; Third: /\d+\.75/ {15}; """ g = Grammar.from_string(grammar) parser = Parser(g, actions=actions, debug=True) # All rules will match but First and Third have higher priority. # Both are regexes so longest match will be used. # Both have the same length. with pytest.raises(DisambiguationError) as e: parser.parse('14.75') assert 'disambiguate' in str(e.value) assert 'First' in str(e.value) assert 'Second' not in str(e.value) assert 'Third' in str(e.value)
def test_repeatable_zero_or_more_with_separator(): """ Tests zero or more repeatable operator with separator. """ grammar = """ S: "2" b*[comma] "3"; terminals b: "1"; comma: ","; """ g = Grammar.from_string(grammar) assert g.get_nonterminal('b_0_comma') p = Parser(g) input_str = '2 1, 1 , 1 3' result = p.parse(input_str) assert result == ["2", ["1", "1", "1"], "3"] input_str = '2 3' result = p.parse(input_str) assert result == ["2", [], "3"]
def test_optional(): """ Tests optional operator. """ grammar = """ S: "2" b? "3"? EOF; terminals b: "1"; """ g = Grammar.from_string(grammar) assert g.get_nonterminal('b_opt') p = Parser(g) input_str = '2 1 3' result = p.parse(input_str) assert result == ["2", "1", "3", None] input_str = '2 3' result = p.parse(input_str) assert result == ["2", None, "3", None] input_str = '2 1' result = p.parse(input_str) assert result == ["2", "1", None, None] input_str = ' 1 3' with pytest.raises(ParseError) as e: p.parse(input_str) assert 'Expected: 2' in str(e)
def test_repeatable_zero_or_more(): """ Tests zero or more repeatable operator. """ grammar = """ S: "2" b* "3"; terminals b: "1"; """ g = Grammar.from_string(grammar) assert g.get_nonterminal('b_0') assert g.get_nonterminal('b_1') p = Parser(g) input_str = '2 1 1 1 3' result = p.parse(input_str) assert result == ["2", ["1", "1", "1"], "3"] input_str = '2 3' result = p.parse(input_str) assert result == ["2", [], "3"]
def test_error_recovery_complete(): """ In this test we start from the 'Result' rule so parglare will require input to end with 'EOF' for the parse to be successful. """ parser = Parser(g, actions=actions, error_recovery=True) result = parser.parse("1 + 2 + * 3 & 89 - 5") # Both '*' and '& 89' should be dropped now as the parser expects EOF at # the end. Thus the parser should calculate '1 + 2 + 3 - 5' assert result == 1 assert len(parser.errors) == 2 e1, e2 = parser.errors assert e1.location.start_position == 8 assert e1.location.end_position == 9 # Characters of the second error should be packed as a single error # spanning the whole erroneous region. Whitespaces should be included too. assert e2.location.start_position == 12 assert e2.location.end_position == 16 assert 'Error at 1:12:"+ 2 + * 3 *& 89 - 5" => '\ 'Expected: ) or * or + or - or / or EOF or ^' in str(e2)
def test_custom_error_recovery(): """ Test that registered callable for error recovery is called with the right parameters. """ called = [False] def my_recovery(context, error): expected_symbols = context.state.actions.keys() called[0] = True assert isinstance(context.parser, Parser) assert context.input_str == '1 + 2 + * 3 - 5' assert context.position == 8 open_par = g.get_terminal('(') assert open_par in expected_symbols number = g.get_terminal('number') assert number in expected_symbols return None, context.position + 1 parser = Parser(g, actions=actions, error_recovery=my_recovery, debug=True) result = parser.parse("1 + 2 + * 3 - 5") assert result == 1 # Assert that recovery handler is called. assert called[0]
def test_repeatable_one_or_more_with_separator(): """ Tests one or more repeatable operator with separator. """ grammar = """ S: "2" b+[comma] "3"; terminals b: "1"; comma: ","; """ g = Grammar.from_string(grammar) assert g.get_nonterminal('b_1_comma') p = Parser(g) input_str = '2 1, 1 , 1 3' result = p.parse(input_str) assert result == ["2", ["1", "1", "1"], "3"] input_str = '2 3' with pytest.raises(ParseError) as e: p.parse(input_str) assert 'Expected: b' in str(e)
def test_cyclic_grammar_2(): """ From the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi """ grammar = """ S: S S; S: 'x'; S: EMPTY; """ g = Grammar.from_string(grammar) with pytest.raises(SRConflicts): Parser(g, prefer_shifts=False) p = GLRParser(g) results = p.parse('xx') # We have 11 valid solutions assert len(results) == 11 expected = [ ['x', 'x'], [[[], 'x'], 'x'], [[[], [[], 'x']], 'x'], ['x', [[], 'x']], [[[], 'x'], [[], 'x']], [[], ['x', 'x']], [[], [[], ['x', 'x']]], ['x', [[], 'x']], [[[], 'x'], [[], 'x']], [[[], [[], 'x']], [[], 'x']], [[], [[[], 'x'], 'x']] ] assert expected == results
def test_custom_error_recovery(): """ Test that registered callable for error recovery is called with the right parameters. """ called = [False] def my_recovery(parser, input, position, expected_symbols): called[0] = True assert isinstance(parser, Parser) assert input == '1 + 2 + * 3 - 5' assert position == 8 assert type(expected_symbols) is set assert Terminal('(') in expected_symbols assert Terminal('number') in expected_symbols return None, None, position + 1 parser = Parser(g, actions=actions, error_recovery=my_recovery, debug=True) result = parser.parse("1 + 2 + * 3 - 5") assert result == 1 # Assert that recovery handler is called. assert called[0]
def test_nondeterministic_LR_raise_error(): """Language of even length palindromes. This is a non-deterministic grammar and the language is non-ambiguous. If the string is a even length palindrome parser should reduce EMPTY at he middle of the string and start to reduce by A and B. LR parsing is deterministic so this grammar can't parse the input as the EMPTY reduction will be tried only after consuming all the input by implicit disambiguation strategy of favouring shifts over empty reductions. OTOH, GLR parser can handle this by forking parser at each step and trying both empty reductions and shifts. Only the parser that has reduced empty at the middle of the input will succeed. """ grammar = """ S: A | B | EMPTY; A: '1' S '1'; B: '0' S '0'; """ g = Grammar.from_string(grammar) with pytest.raises(ParseError): p = Parser(g) p.parse('0101000110001010') p = GLRParser(g) results = p.parse('0101000110001010') assert len(results) == 1
def test_error_recovery_uncomplete(): """ Test default recovery for partial parse. parglare will try to parse as much as possible for the given grammar and input. If the current input can be reduced to the start rule the parse will succeed. In order to prevent partial parse first grammar rule should be ended with EOF like in the case of 'Result' rule. """ parser = Parser(g, start_production=2, actions=actions, error_recovery=True, debug=True) result = parser.parse("1 + 2 + * 3 & 89 - 5") # '*' after '+' will be droped but when the parser reach '&' # it has a complete expression and will terminate successfuly and # report only one error ('*' after '+'). # The parser should thus calculate '1 + 2 + 3' assert result == 6 assert len(parser.errors) == 1 e = parser.errors[0] assert e.position == 8 assert e.length == 1 assert 'Unexpected input at position (1, 8). Expected' in str(e)