def test_reduce_tree(): input = "34 a 3 3 b 3 b" parser = ParserPython(grammar, reduce_tree=False) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'first' assert isinstance(result[0], NonTerminal) assert result[3].rule_name == 'first' assert result[0][0].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third' parser = ParserPython(grammar, reduce_tree=True) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'fourth' assert isinstance(result[0], Terminal) assert result[3].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third_str'
def test_non_optional_precedence(): """ Test that all tried match at position are reported. """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') except NoMatch as e: assert "Expected 'a' or 'b'" in str(e) def grammar(): return ['b', Optional('a')] parser = ParserPython(grammar) try: parser.parse('c') except NoMatch as e: assert "Expected 'b'" in str(e)
def parse(content: str) -> 'Atom': from foil.language.grammar import atom from foil.language.grammar import comment parser = ParserPython(atom, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def parse(content: str) -> 'Literal': from foil.language.grammar import literal from foil.language.grammar import comment parser = ParserPython(literal, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def test_memoization_positive(capsys): ''' Test that already matched rule is found in the cache on subsequent matches. Args: capsys - pytest fixture for output capture ''' def grammar(): return [(rule1, ruleb), (rule1, rulec)] def rule1(): return rulea, ruleb def rulea(): return "a" def ruleb(): return "b" def rulec(): return "c" parser = ParserPython(grammar, memoization=True, debug=True) # Parse input where a rule1 will match but ruleb will fail # Second sequence will try rule1 again on the same location # and result should be found in the cache. parse_tree = parser.parse("a b c") # Assert that cached result is used assert "Cache hit" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def test_memoization_nomatch(capsys): ''' Test that already failed match is found in the cache on subsequent matches. ''' def grammar(): return [(rule1, ruleb), [rule1, rulec]] def rule1(): return rulea, ruleb def rulea(): return "a" def ruleb(): return "b" def rulec(): return "c" parser = ParserPython(grammar, memoization=True, debug=True) parse_tree = parser.parse("c") assert "Cache hit for [rule1=Sequence, 0] = '0'" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def test_02_text_line_pair (self) : def body(): return OneOrMore ( [ text_line ], rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) # # print('\n: flatten') ; pp(flatten(parsed)) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_body = NonTerminal(body(), [ p_l1_text_line, p_l2_text_line ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_memoization_nomatch(capsys: FixtureRequest) -> None: """ Test that already failed match is found in the cache on subsequent matches. """ def grammar() -> List[Any]: return [(rule1, ruleb), [rule1, rulec]] def rule1() -> Tuple[Any, ...]: return rulea, ruleb def rulea() -> str: return "a" def ruleb() -> str: return "b" def rulec() -> str: return "c" parser = ParserPython(grammar, memoization=True, debug=True) parse_tree = parser.parse("c") assert "Cache hit for [rule1=Sequence, 0] = '0'" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def test_unordered_group_with_separator(): def grammar(): return UnorderedGroup("a", "b", "c", sep=StrMatch(",")), EOF parser = ParserPython(grammar) parsed = parser.parse("b, a , c") assert str(parsed) == "b | , | a | , | c | " assert repr(parsed) == \ "[ 'b' [0], ',' [1], 'a' [3], ',' [5], 'c' [7], EOF [8] ]" with pytest.raises(NoMatch): parser.parse("a, b, a, c") with pytest.raises(NoMatch): parser.parse("a, c") with pytest.raises(NoMatch): parser.parse("b, b, a, c") with pytest.raises(NoMatch): parser.parse(",a, b, c") with pytest.raises(NoMatch): parser.parse("a, b, c,") with pytest.raises(NoMatch): parser.parse("a, ,b, c")
def test_nondeterministic_unordered_group(): def root(): return 'word1', UnorderedGroup(some_rule, 'word2', some_rule), EOF def some_rule(): return Optional('word2'), Optional('word3') content = '''word1 word2 ''' # If the 'word2' from unordered group in the `root` rule matches first # the input parses, else it fails. # We repeat parser construction and parsing many times to check # if it fails every time. The current fix will iterate in order from left # to right and repeat matching until all rules in a unordered group # succeeds. fail = 0 success = 0 for _ in range(100): try: parser = ParserPython(root) parser.parse(content) success += 1 except NoMatch: fail += 1 assert fail == 100
def test_skipws(): """ skipws will skip whitespaces. """ def grammar(): return ("one", "two", "three") parser = ParserPython(grammar) # If skipws is on this should parse without error. parser.parse("one two three") # If not the same input will raise exception. parser = ParserPython(grammar, skipws=False) with pytest.raises(NoMatch): parser.parse("one two three")
def parse(file, enc): with codecs.open(file, "r", encoding=enc) as opened_file: opened_file_content = opened_file.read() parser = ParserPython(segnetics_file, reduce_tree=True) parse_tree = visit_parse_tree(parser.parse(opened_file_content), SegneticsVisitor()) return parse_tree
def test_unordered_group_with_optionals_and_separator(): def grammar(): return UnorderedGroup("a", Optional("b"), "c", sep=","), EOF parser = ParserPython(grammar) parsed = parser.parse("b, a, c") assert parsed parsed = parser.parse("a, c, b") assert parsed parsed = parser.parse("a, c") assert parsed with pytest.raises(NoMatch): parser.parse("a, b, c, b") with pytest.raises(NoMatch): parser.parse("a, b ") with pytest.raises(NoMatch): parser.parse("a, c, ") with pytest.raises(NoMatch): parser.parse("a, b c ") with pytest.raises(NoMatch): parser.parse(",a, c ")
def test_with_some_words_2(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_zero_or_more_with_optional_separator(): def grammar(): return ZeroOrMore("a", sep=RegExMatch(",?")), EOF parser = ParserPython(grammar) parsed = parser.parse("a, a , a a , a,a, a") assert str(parsed) == \ "a | , | a | , | a | a | , | a | , | a | , | a | " assert repr(parsed) == \ "[ 'a' [0], ',' [1], 'a' [3], ',' [5], 'a' [7], "\ "'a' [11], ',' [13], 'a' [16], ',' [17], 'a' [18], ',' [19],"\ " 'a' [21], EOF [22] ]" parsed = parser.parse("") assert str(parsed) == "" assert repr(parsed) == "[ EOF [0] ]" parser.parse("aa a") with pytest.raises(NoMatch): parser.parse(",a,a ,a") with pytest.raises(NoMatch): parser.parse("a,a ,a,") with pytest.raises(NoMatch): parser.parse("bbb")
def parse_tree(): def grammar(): return ("first", "second", "third") parser = ParserPython(grammar) return parser.parse(" first \n\n second third")
def setUp(self): # first get defaults, should all be False for boolean flags super().setUp() global parse_debug, record, analyzing self.parse_debug = parse_debug self.record = record self.analyzing = analyzing # quiet, no parse trees displayeda # self.debug = False # show parse tree for pass >= self.debug # self.debug = 2 # Show text being parsed # self.show = True # and again, to apply behavior per altered settings super().setUp() self.parser = ParserPython( grammar, reduce_tree=False, debug=self.parse_debug, ) write_scratch(_clean=True)
def test_direct_rule_call() -> None: """ Test regression where in direct rule call semantic action is erroneously attached to both caller and callee. """ def grammar(): return rule1, rule2 def rule1(): return "a" def rule2(): return rule1 call_count = [0] class DummySemAction(SemanticAction): def first_pass(self, parser, node, nodes): call_count[0] += 1 return SemanticAction.first_pass(self, parser, node, nodes) # Sem action is attached to rule2 only but # this bug will attach it to rule1 also resulting in # wrong call count. rule2.sem = DummySemAction() # type: ignore parser = ParserPython(grammar) parse_tree = parser.parse("aa") parser.getASG() assert call_count[0] == 1, "Semantic action should be called once!"
def test_05_paragraph_multiple (self) : def body(): return OneOrMore ( OrderedChoice( [ paragraph, newline ] ), rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 + '\n' text = text * 3 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_paragraph = NonTerminal(paragraph(), [ p_l1_text_line, p_l2_text_line ]) p_body = NonTerminal(body(), [ p_paragraph, p_newline, p_paragraph, p_newline, p_paragraph, p_newline, ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [ p_body, p_eof ] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_combine_python(): # This will result in NonTerminal node def root(): return my_rule(), "." # This will result in Terminal node def my_rule(): return Combine(ZeroOrMore("a"), OneOrMore("b")) parser = ParserPython(root) input1 = "abbb." # Whitespaces are preserved in lexical rules so the following input # should not be recognized. input2 = "a b bb." ptree1 = parser.parse(input1) with pytest.raises(NoMatch): parser.parse(input2) assert isinstance(ptree1, NonTerminal) assert isinstance(ptree1[0], Terminal) assert ptree1[0].value == "abbb"
def test_one_or_more_with_optional_separator(): def grammar(): return OneOrMore("a", sep=RegExMatch(",?")), "b" parser = ParserPython(grammar) parsed = parser.parse("a, a a, a b") assert str(parsed) == "a | , | a | a | , | a | b" assert repr(parsed) == \ "[ 'a' [0], ',' [1], 'a' [3], 'a' [6], ',' [7], "\ "'a' [9], 'b' [12] ]" parser.parse("a b") with pytest.raises(NoMatch): parser.parse("") with pytest.raises(NoMatch): parser.parse("b") with pytest.raises(NoMatch): parser.parse("a a, b") with pytest.raises(NoMatch): parser.parse(", a, a b")
def create_parser(**kwargs): try: debug_mode = kwargs['debug'] except KeyError: debug_mode = False return ParserPython(program, comments, reduce_tree=False, ignore_case=True, ws='\t ', skipws=True, debug=debug_mode)
def convert_trog(inf: str, outf: TextIO) -> None: pp = ParserPython(trogfile, skipws=False) try: tree = pp.parse_file(inf) print(tree) except Exception as e: print(e)
def main(text): from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF from arpeggio import RegExMatch as _ def number(): return _(r'\d*\.\d*|\d+') def factor(): return Optional(["+", "-"]), [number, ("(", expression, ")")] def term(): return factor, ZeroOrMore(["*", "/"], factor) def expression(): return term, ZeroOrMore(["+", "-"], term) def calc(): return OneOrMore(expression), EOF from arpeggio import ParserPython parser = ParserPython(calc) # calc is the root rule of your grammar # Use param debug=True for verbose debugging # messages and grammar and parse tree visualization # using graphviz and dot parse_tree = parser.parse(text) pass
def main(argv): # Parsing #different alg relation next to each other i.e a*|b require brackets (a*)|b parser = ParserPython(formula) #, debug=True) #, reduce_tree = True) parse_tree = parser.parse(argv) result = visit_parse_tree(parse_tree, formVisitor()) result.tostr() return result
def load_from_str(self, content: str) -> 'Builder': parser = ParserPython(cypher, comment_def=comment) parsed = parser.parse(content) visited = visit_parse_tree(parsed, KnowledgeVisitor()) base = RuleBase(visited['data']) self.load_from_base(base) return self
def language_from_str(language_def, metamodel): """ Constructs parser and initializes metamodel from language description given in textX language. Args: language_def (str): A language description in textX. metamodel (TextXMetaModel): A metamodel to initialize. Returns: Parser for the new language. """ if metamodel.debug: metamodel.dprint("*** PARSING LANGUAGE DEFINITION ***") # Check the cache for already conctructed textX parser if metamodel.debug in textX_parsers: parser = textX_parsers[metamodel.debug] else: # Create parser for TextX grammars using # the arpeggio grammar specified in this module parser = ParserPython(textx_model, comment_def=comment, ignore_case=False, reduce_tree=False, memoization=metamodel.memoization, debug=metamodel.debug, file=metamodel.file) # Cache it for subsequent calls textX_parsers[metamodel.debug] = parser # Parse language description with textX parser try: parse_tree = parser.parse(language_def) except NoMatch as e: line, col = parser.pos_to_linecol(e.position) raise TextXSyntaxError(text(e), line, col) # Construct new parser and meta-model based on the given language # description. lang_parser = visit_parse_tree(parse_tree, TextXVisitor(parser, metamodel)) # Meta-model is constructed. Validate its semantics. metamodel.validate() # Here we connect meta-model and language parser for convenience. lang_parser.metamodel = metamodel metamodel._parser_blueprint = lang_parser if metamodel.debug: # Create dot file for debuging purposes PMDOTExporter().exportFile( lang_parser.parser_model, "{}_parser_model.dot".format(metamodel.rootcls.__name__)) return lang_parser
def test_ws(): """ ws can be changed per Sequence. """ def grammar(): return Sequence("one", "two", "three"), "four" parser = ParserPython(grammar) # By default, ws consists of space, tab and newline # So this should parse. parser.parse("""one two three four""") def grammar(): return Sequence("one", "two", "three", ws=' '), "four" parser = ParserPython(grammar) # If we change ws per sequence and set it to space only # given input will raise exception with pytest.raises(NoMatch): parser.parse("""one two three four""") # But ws will be default outside of sequence parser.parse("""one two three four""") # Test for ws with more than one char. def grammar(): return Sequence("one", "two", "three", ws=' \t'), "four" parser = ParserPython(grammar) # If we change ws per sequence and set it to spaces and tabs # given input will raise exception with pytest.raises(NoMatch): parser.parse("one two \nthree \t four") # But ws will be default outside of sequence parser.parse("one two three \n\t four") # Inside sequence a spaces and tabs will be skipped parser.parse("one \t two\t three \nfour")
def test_pp_construction(): ''' Tests parser construction from python internal DSL description. ''' parser = ParserPython(calc) assert parser.parser_model.rule_name == 'calc' assert isinstance(parser.parser_model, Sequence) assert parser.parser_model.nodes[0].desc == 'OneOrMore'
def __init__(self, graph, platform, cfg, mappingDict={}, debug=False): self.__graph = graph self.__platform = platform self.__mappingDict = mappingDict self.__parser = ParserPython( Grammar.logicLanguage, reduce_tree=True, debug=debug ) self.__debug = debug self.__cfg = cfg