def test_zero_or_more_with_optional_separator(): def grammar(): return ZeroOrMore("a", sep=RegExMatch(",?")), EOF parser = ParserPython(grammar) parsed = parser.parse("a, a , a a , a,a, a") assert str(parsed) == \ "a | , | a | , | a | a | , | a | , | a | , | a | " assert repr(parsed) == \ "[ 'a' [0], ',' [1], 'a' [3], ',' [5], 'a' [7], "\ "'a' [11], ',' [13], 'a' [16], ',' [17], 'a' [18], ',' [19],"\ " 'a' [21], EOF [22] ]" parsed = parser.parse("") assert str(parsed) == "" assert repr(parsed) == "[ EOF [0] ]" parser.parse("aa a") with pytest.raises(NoMatch): parser.parse(",a,a ,a") with pytest.raises(NoMatch): parser.parse("a,a ,a,") with pytest.raises(NoMatch): parser.parse("bbb")
def test_non_optional_precedence(): """ Test that all tried match at position are reported. """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') except NoMatch as e: assert "Expected 'a' or 'b'" in str(e) def grammar(): return ['b', Optional('a')] parser = ParserPython(grammar) try: parser.parse('c') except NoMatch as e: assert "Expected 'b'" in str(e)
def try_parsing(self, parser: ParserPython, text_to_parse: str): # noinspection PyBroadException try: parser.parse(text_to_parse) except: self.fail("Could not parse {0} using the {1} parser!".format( text_to_parse, parser.__class__))
def test_reduce_tree(): input = "34 a 3 3 b 3 b" parser = ParserPython(grammar, reduce_tree=False) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'first' assert isinstance(result[0], NonTerminal) assert result[3].rule_name == 'first' assert result[0][0].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third' parser = ParserPython(grammar, reduce_tree=True) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'fourth' assert isinstance(result[0], Terminal) assert result[3].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third_str'
def test_nondeterministic_unordered_group(): def root(): return 'word1', UnorderedGroup(some_rule, 'word2', some_rule), EOF def some_rule(): return Optional('word2'), Optional('word3') content = '''word1 word2 ''' # If the 'word2' from unordered group in the `root` rule matches first # the input parses, else it fails. # We repeat parser construction and parsing many times to check # if it fails every time. The current fix will iterate in order from left # to right and repeat matching until all rules in a unordered group # succeeds. fail = 0 success = 0 for _ in range(100): try: parser = ParserPython(root) parser.parse(content) success += 1 except NoMatch: fail += 1 assert fail == 100
def test_combine_python(): # This will result in NonTerminal node def root(): return my_rule(), "." # This will result in Terminal node def my_rule(): return Combine(ZeroOrMore("a"), OneOrMore("b")) parser = ParserPython(root) input1 = "abbb." # Whitespaces are preserved in lexical rules so the following input # should not be recognized. input2 = "a b bb." ptree1 = parser.parse(input1) with pytest.raises(NoMatch): parser.parse(input2) assert isinstance(ptree1, NonTerminal) assert isinstance(ptree1[0], Terminal) assert ptree1[0].value == "abbb"
def test_zeroormore_with_separator(): def grammar(): return ZeroOrMore(['a', 'b'], sep=','), EOF parser = ParserPython(grammar, reduce_tree=False) result = parser.parse('a, b, b, b, a') assert result with pytest.raises(NoMatch): parser.parse('a, b a')
def test_rrel_basic_parser1(): parser = ParserPython(rrel_standalone) parse_tree = parser.parse("^pkg*.cls") assert len(parse_tree) == 2 # always true (one path, one EOF) parse_tree = parser.parse("obj.ref.~extension *.methods") assert len(parse_tree) == 2 # always true (one path, one EOF) parse_tree = parser.parse("instance.(type.vals)*") assert len(parse_tree) == 2 # always true (one path, one EOF)
def test_default_action_disabled(): parser = ParserPython(grammar) parser.parse('(-34) strmatch') parser.getASG(defaults=False) assert not p_removed assert not number_str assert parse_tree_node
class MultipassParser(AParser): def __init__(self): self.stmtparser = ParserPython(grammar.statement, skipws=False, debug=False) self.textparser = ParserPython(grammar.freetext, skipws=False, debug=False) self.visitor = document.TreeVisitor(self) def parse(self, text): parsers = [(self.stmtparser, self.__statement_transform, lambda parse_tree, parse_result: parse_tree.position_end), (self.textparser, self.__free_text_transform, lambda parse_tree, parse_result: parse_result["length"])] try: parse_tree = self.stmtparser.parse(text) node = self.__statement_transform(parse_tree) node_length = parse_tree.position_end return node, text[node_length:] except arpeggio.NoMatch: pass try: parse_tree = self.textparser.parse(text) if parse_tree is None: return { 'type': 'skip', 'text': text }, None text_node = self.__free_text_transform(parse_tree) text_length = text_node["length"] return text_node, text[text_length:] except arpeggio.NoMatch: pass return { 'type': 'text', 'length': len(text), 'text': text }, None def __statement_transform(self, parse_tree): return arpeggio.visit_parse_tree(parse_tree, self.visitor) def __free_text_transform(self, parse_tree): raw = str(parse_tree) node = dict(type="text", kind="text") node["text"] = raw node["length"] = len(raw) return node
def test_sequence_of_nots(): """ Test that sequence of Not rules is handled properly. """ def grammar(): return Not('one'), Not('two'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' two ident') assert "Not expected input" in str(e.value)
def test_not_match_as_alternative(): """ Test that Not is not reported if a part of OrderedChoice. """ def grammar(): return ['one', Not('two')], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' at " in str(e.value)
def test_not_match_as_alternative(): """ Test that Not is not reported if a part of OrderedChoice. """ def grammar(): return ['one', Not('two')], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' at " in str(e)
def test_optional_no_error(): """ """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') assert False except NoMatch as e: assert "Expected 'b'" in str(e)
def test_not_match_at_beginning(): """ Test that matching of Not ParsingExpression is not reported in the error message. """ def grammar(): return Not('one'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' one ident') assert "Not expected input" in str(e.value)
def test_file_name_reporting(): """ Test that if parser has file name set it will be reported. """ def grammar(): return Optional('a'), 'b', EOF parser = ParserPython(grammar) try: parser.parse("\n\n a c", file_name="test_file.peg") except NoMatch as e: assert "Expected 'b' at test_file.peg:(3, 6)" in str(e)
def test_oneormore_with_ordered_choice_separator(): def grammar(): return OneOrMore(['a', 'b'], sep=[',', ';']), EOF parser = ParserPython(grammar, reduce_tree=False) result = parser.parse('a, a; a, b, a; a') assert result with pytest.raises(NoMatch): parser.parse('a, b a') with pytest.raises(NoMatch): parser.parse('a, b: a')
def test_file_name_reporting(): """ Test that if parser has file name set it will be reported. """ def grammar(): return Optional('a'), 'b', EOF parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse("\n\n a c", file_name="test_file.peg") assert "Expected 'b' at position test_file.peg:(3, 6)" in str(e) assert (e.value.line, e.value.col) == (3, 6)
def test_sequence_of_nots(): """ Test that sequence of Not rules is handled properly. """ def grammar(): return Not('one'), Not('two'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' two ident') assert "Not expected input" in str(e)
def test_file_name_reporting(): """ Test that if parser has file name set it will be reported. """ def grammar(): return Optional('a'), 'b', EOF parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse("\n\n a c", file_name="test_file.peg") assert "Expected 'b' at position test_file.peg:(3, 6)" in str(e.value) assert (e.value.line, e.value.col) == (3, 6)
def test_alternative_added(): """ Test that matches from alternative branches at the same positiona are reported. """ def grammar(): return ['one', 'two'], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' or 'two'" in str(e.value) assert (e.value.line, e.value.col) == (1, 4)
def test_comment_matching_not_reported(): """ Test that matching of comments is not reported. """ def grammar(): return Optional('a'), 'b', EOF def comments(): return _('\/\/.*$') parser = ParserPython(grammar, comments) try: parser.parse('\n\n a // This is a comment \n c') except NoMatch as e: assert "Expected 'b' at position (4, 2)" in str(e)
def test_optional_no_error(): """ Test that optional match failure does not show up in the NoMatch errors. """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') assert False except NoMatch as e: assert "Expected 'b'" in str(e)
def test_comment_matching_not_reported(): """ Test that matching of comments is not reported. """ def grammar(): return Optional('a'), 'b', EOF def comments(): return _('\/\/.*$') parser = ParserPython(grammar, comments) with pytest.raises(NoMatch) as e: parser.parse('\n\n a // This is a comment \n c') assert "Expected 'b' at position (4, 2)" in str(e) assert (e.value.line, e.value.col) == (4, 2)
def test_optional_inside_zeroormore(): """ Test optional match inside a zero or more. Optional should always succeed thus inducing ZeroOrMore to try the match again. Arpeggio handle this using soft failures. """ def grammar(): return ZeroOrMore(Optional('a')) parser = ParserPython(grammar) with pytest.raises(NoMatch): # This could lead to infinite loop parser.parse('b')
def test_not_match_at_beginning(): """ Test that matching of Not ParsingExpression is not reported in the error message. """ def grammar(): return Not('one'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' one ident') assert "Not expected input" in str(e)
def test_alternative_added(): """ Test that matches from alternative branches at the same positiona are reported. """ def grammar(): return ['one', 'two'], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' or 'two'" in str(e) assert (e.value.line, e.value.col) == (1, 4)
def test_compound_not_match(): """ Test a more complex Not match error reporting. """ def grammar(): return [Not(['two', 'three']), 'one', 'two'], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' or 'two' at" in str(e) with pytest.raises(NoMatch) as e: parser.parse(' four ident') assert "Expected 'one' or 'two' at" in str(e)
def test_optional_inside_zeroormore(): """ Test optional match inside a zero or more. Optional should always succeed thus inducing ZeroOrMore to try the match again. Arpeggio handle this case. """ def grammar(): return ZeroOrMore(Optional('a')), EOF parser = ParserPython(grammar) with pytest.raises(NoMatch): # This could lead to infinite loop parser.parse('b')
def parse_tree(): def grammar(): return ("first", "second", "third") parser = ParserPython(grammar) return parser.parse(" first \n\n second third")
def test_direct_rule_call(): ''' Test regression where in direct rule call semantic action is erroneously attached to both caller and callee. ''' def grammar(): return rule1, rule2 def rule1(): return "a" def rule2(): return rule1 call_count = [0] class DummySemAction(SemanticAction): def first_pass(self, parser, node, nodes): call_count[0] += 1 return SemanticAction.first_pass(self, parser, node, nodes) # Sem action is attached to rule2 only but # this bug will attach it to rule1 also resulting in # wrong call count. rule2.sem = DummySemAction() parser = ParserPython(grammar) parse_tree = parser.parse("aa") parser.getASG() assert call_count[0] == 1, "Semantic action should be called once!"
def parse(file, enc): with codecs.open(file, "r", encoding=enc) as opened_file: opened_file_content = opened_file.read() parser = ParserPython(segnetics_file, reduce_tree=True) parse_tree = visit_parse_tree(parser.parse(opened_file_content), SegneticsVisitor()) return parse_tree
def parse_string(self, src, grammar=program, filename=None): oldsrcs = self.input_sources self.context.optimization_level = self.optimization_level self.input_sources = src parser = ParserPython( grammar, comment_def=comment, skipws=True, reduce_tree=False, memoization=True, debug=False, ) self.context.parsers.append(parser) self.context.filenames.append(filename) try: parse_tree = parser.parse(self.input_sources) visitor = MuvVisitor(debug=False) visitor.muvparser = self parse_tree = visit_parse_tree(parse_tree, visitor) out = parse_tree.generate_code(self.context) if self.error_found: return False if len(self.context.filenames) == 1: if self.context.filenames[-1]: filetext = " from {0}".format( self.context.filenames[-1] ) else: filetext = '' self.output = ( "( Generated{0} by the MUV compiler. )\n" "( https://github.com/revarbat/pymuv )\n" "{1}\n" ).format(filetext, self.output) self.output += out if not self.error_found and len(self.context.filenames) == 1: if self.wrapper_program: self.output = ( "@program {0}\n" "1 99999 d\n" "1 i\n" "{1}\n" ".\n" "c\n" "q\n" ).format(self.wrapper_program, self.output) return True except MuvError as e: line, col = parser.pos_to_linecol(e.position) self.print_error(filename, line, col, str(e)) return False except NoMatch as e: line, col = parser.pos_to_linecol(e.position) expected = self.simplify_parse_error(e) self.print_error(filename, line, col, "Expected %s" % expected) return False finally: self.input_sources = oldsrcs self.context.parsers.pop() self.context.filenames.pop()
def test_memoization_nomatch(capsys: FixtureRequest) -> None: """ Test that already failed match is found in the cache on subsequent matches. """ def grammar() -> List[Any]: return [(rule1, ruleb), [rule1, rulec]] def rule1() -> Tuple[Any, ...]: return rulea, ruleb def rulea() -> str: return "a" def ruleb() -> str: return "b" def rulec() -> str: return "c" parser = ParserPython(grammar, memoization=True, debug=True) parse_tree = parser.parse("c") assert "Cache hit for [rule1=Sequence, 0] = '0'" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def parse(content: str) -> 'Literal': from foil.language.grammar import literal from foil.language.grammar import comment parser = ParserPython(literal, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def test_memoization_positive(capsys): ''' Test that already matched rule is found in the cache on subsequent matches. Args: capsys - pytest fixture for output capture ''' def grammar(): return [(rule1, ruleb), (rule1, rulec)] def rule1(): return rulea, ruleb def rulea(): return "a" def ruleb(): return "b" def rulec(): return "c" parser = ParserPython(grammar, memoization=True, debug=True) # Parse input where a rule1 will match but ruleb will fail # Second sequence will try rule1 again on the same location # and result should be found in the cache. parse_tree = parser.parse("a b c") # Assert that cached result is used assert "Cache hit" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def parse(content: str) -> 'Atom': from foil.language.grammar import atom from foil.language.grammar import comment parser = ParserPython(atom, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def test_direct_rule_call() -> None: """ Test regression where in direct rule call semantic action is erroneously attached to both caller and callee. """ def grammar(): return rule1, rule2 def rule1(): return "a" def rule2(): return rule1 call_count = [0] class DummySemAction(SemanticAction): def first_pass(self, parser, node, nodes): call_count[0] += 1 return SemanticAction.first_pass(self, parser, node, nodes) # Sem action is attached to rule2 only but # this bug will attach it to rule1 also resulting in # wrong call count. rule2.sem = DummySemAction() # type: ignore parser = ParserPython(grammar) parse_tree = parser.parse("aa") parser.getASG() assert call_count[0] == 1, "Semantic action should be called once!"
def test_skipws(): """ skipws will skip whitespaces. """ def grammar(): return ("one", "two", "three") parser = ParserPython(grammar) # If skipws is on this should parse without error. parser.parse("one two three") # If not the same input will raise exception. parser = ParserPython(grammar, skipws=False) with pytest.raises(NoMatch): parser.parse("one two three")
def test_with_some_words_2(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def main(text): from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF from arpeggio import RegExMatch as _ def number(): return _(r'\d*\.\d*|\d+') def factor(): return Optional(["+", "-"]), [number, ("(", expression, ")")] def term(): return factor, ZeroOrMore(["*", "/"], factor) def expression(): return term, ZeroOrMore(["+", "-"], term) def calc(): return OneOrMore(expression), EOF from arpeggio import ParserPython parser = ParserPython(calc) # calc is the root rule of your grammar # Use param debug=True for verbose debugging # messages and grammar and parse tree visualization # using graphviz and dot parse_tree = parser.parse(text) pass
def test_comment_matching_not_reported(): """ Test that matching of comments is not reported. """ def grammar(): return Optional('a'), 'b', EOF def comments(): return _(r'//.*$') parser = ParserPython(grammar, comments) with pytest.raises(NoMatch) as e: parser.parse('\n\n a // This is a comment \n c') assert "Expected 'b' at position (4, 2)" in str(e.value) assert (e.value.line, e.value.col) == (4, 2)
def test_compound_not_match(): """ Test a more complex Not match error reporting. """ def grammar(): return [Not(['two', 'three']), 'one', 'two'], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' or 'two' at" in str(e.value) with pytest.raises(NoMatch) as e: parser.parse(' four ident') assert "Expected 'one' or 'two' at" in str(e.value)
def test_optional_with_better_match(): """ Test that optional match that has gone further in the input stream has precedence over non-optional. """ def grammar(): return [first, Optional(second)] def first(): return 'one', 'two', 'three', '4' def second(): return 'one', 'two', 'three', 'four', 'five' parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse('one two three four 5') assert "Expected 'five'" in str(e) assert (e.value.line, e.value.col) == (1, 20)
def no_memoization(): parser = ParserPython(rhapsody, memoization=False) # Smaller file file_name = join(dirname(__file__), 'test_inputs', 'LightSwitch.rpy') with codecs.open(file_name, "r", encoding="utf-8") as f: content = f.read() small = parser.parse(content) # File that is double in size file_name = join(dirname(__file__), 'test_inputs', 'LightSwitchDouble.rpy') with codecs.open(file_name, "r", encoding="utf-8") as f: content = f.read() large = parser.parse(content)
def test_optional_with_better_match(): """ Test that optional match that has gone further in the input stream has precedence over non-optional. """ def grammar(): return [first, Optional(second)] def first(): return 'one', 'two', 'three', '4' def second(): return 'one', 'two', 'three', 'four', 'five' parser = ParserPython(grammar) try: parser.parse('one two three four 5') except NoMatch as e: assert "Expected 'five'" in str(e)
def test_and(): def grammar(): return "a", And("b"), ["c", "b"], EOF parser = ParserPython(grammar) parsed = parser.parse("ab") assert str(parsed) == "a | b | " assert repr(parsed) == "[ 'a' [0], 'b' [1], EOF [2] ]" # 'And' will try to match 'b' and fail so 'c' will never get matched with pytest.raises(NoMatch): parser.parse("ac") # 'And' will not consume 'b' from the input so second 'b' will never match with pytest.raises(NoMatch): parser.parse("abb")
def test_parse_input(): parser = ParserPython(calc) input = "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)" result = parser.parse(input) assert isinstance(result, NonTerminal) assert str(result) == "4 | + | 5 | * | 7 | / | 3.45 | * | - | 45 | * | ( | 2.56 | + | 32 | ) | / | - | 56 | * | ( | 2 | - | 1.34 | ) | " assert repr(result) == "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]"
def test_not(): def grammar(): return "a", Not("b"), ["b", "c"], EOF parser = ParserPython(grammar) parsed = parser.parse("ac") assert str(parsed) == "a | c | " assert repr(parsed) == "[ 'a' [0], 'c' [1], EOF [2] ]" # Not will will fail on 'b' with pytest.raises(NoMatch): parser.parse("ab") # And will not consume 'c' from the input so 'b' will never match with pytest.raises(NoMatch): parser.parse("acb")
def test_zero_or_more(): def grammar(): return ZeroOrMore("a"), EOF parser = ParserPython(grammar) parsed = parser.parse("aaaaaaa") assert str(parsed) == "a | a | a | a | a | a | a | " assert repr(parsed) == "[ 'a' [0], 'a' [1], 'a' [2], 'a' [3], 'a' [4], 'a' [5], 'a' [6], EOF [7] ]" parsed = parser.parse("") assert str(parsed) == "" assert repr(parsed) == "[ EOF [0] ]" with pytest.raises(NoMatch): parser.parse("bbb")
def test_sequence(): def grammar(): return ("a", "b", "c") parser = ParserPython(grammar) parsed = parser.parse("a b c") assert str(parsed) == "a | b | c" assert repr(parsed) == "[ 'a' [0], 'b' [2], 'c' [4] ]"