def build_regex_grammar(): G = Grammar() E = G.add_nonterminal("E", True) T, F, A, X, Y, Z = G.add_nonterminals("T F A X Y Z") pipe, star, opar, cpar, symbol, epsilon = G.add_terminals("| * ( ) symbol ε") E %= T + X, lambda h, s: s[2], None, lambda h, s: s[1] X %= pipe + T + X, lambda h, s: s[3], None, None, lambda h, s: UnionNode(h[0], s[2]) X %= G.epsilon, lambda h, s: h[0] T %= F + Y, lambda h, s: s[2], None, lambda h, s: s[1] Y %= F + Y, lambda h, s: s[2], None, lambda h, s: ConcatNode(h[0], s[1]) Y %= G.epsilon, lambda h, s: h[0] F %= A + Z, lambda h, s: s[2], None, lambda h, s: s[1] Z %= star + Z, lambda h, s: s[2], None, lambda h, s: ClosureNode(h[0]) Z %= G.epsilon, lambda h, s: h[0] A %= symbol, lambda h, s: SymbolNode(s[1]) A %= opar + E + cpar, lambda h, s: s[2], None, None, None A %= epsilon, lambda h, s: EpsilonNode(s[1]) return G
def test_general_recursion_remove(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + b S %= C A %= B + a B %= S + b C %= b new_grammar = remove_left_recursion(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["b"], ["A", "b"]] _graph["A"] = [["B", "a"]] _graph["B"] = [["b", "b", "B'"]] _graph["B'"] = [["a", "b", "b", "B'"], []] _graph["C"] = [["b"]] print(_graph) print(new_grammar) assert (new_grammar == _graph)
def test_remove_unreachable_prods(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= a A %= B + a + b A %= B + a + a A %= B new_grammar = remove_common_prefixes(grammar) S, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["a"]] _graph["A"] = [["B", "A''"]] _graph["A'"] = [["b"], ["a"]] _graph["A''"] = [[], ["a", "A'"]] print(new_grammar) print(_graph) assert new_grammar == _graph
def test_epsilon_remove(): # epsilon test #1 grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + B S %= C A %= b + A + b A %= grammar.epsilon B %= b C %= a C %= b new_grammar = __remove_epsilon_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["A", "B"], ["C"], ["B"]] _graph["A"] = [["b", "A", "b"], ["b", "b"]] _graph["B"] = [["b"]] _graph["C"] = [["a"], ["b"]] assert (new_grammar == _graph) # epsilon test #2 grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + B S %= C A %= b + A + b A %= grammar.epsilon B %= b B %= grammar.epsilon C %= a C %= b new_grammar = __remove_epsilon_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["A", "B"], ["C"], ["B"], ["A"], []] _graph["A"] = [["b", "A", "b"], ["b", "b"]] _graph["B"] = [["b"]] _graph["C"] = [["a"], ["b"]] assert (new_grammar == _graph)
def test_direct_recursion_remove(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + B S %= C A %= A + b A %= a B %= b C %= a C %= b _, G = grammar_to_graph(grammar) new_grammar = __remove_inmediate_left_recursion(G) _graph = {} _graph["S"] = [["A", "B"], ["C"]] _graph["A"] = [["a", "A'"]] _graph["A'"] = [["b", "A'"], []] _graph["B"] = [["b"]] _graph["C"] = [["a"], ["b"]] assert (new_grammar == _graph)
def test_grammar_to_graph(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C, X, Y = grammar.add_nonterminals("A B C X Y") a, b, d, e = grammar.add_terminals("a b d e") S %= A + B S %= C A %= C A %= d B %= Y C %= a C %= b C %= X X %= d X %= e Y %= e _S, graph = grammar_to_graph(grammar) _graph = {} _graph["S"] = [["A", "B"], ["C"]] _graph["A"] = [["C"], ["d"]] _graph["B"] = [["Y"]] _graph["C"] = [["a"], ["b"], ["X"]] _graph["X"] = [["d"], ["e"]] _graph["Y"] = [["e"]] assert (graph == _graph)
def test_build_conflict_str(): G = Grammar() S = G.add_nonterminal("S", True) A, B = G.add_nonterminals("A B") a, b = G.add_terminals("a b") S %= A + B A %= a + A | a B %= b + B | b table = { (S, a): [Production(S, Sentence(A, B))], (A, a): [Production(A, Sentence(a, A)), Production(A, Sentence(a))], (B, b): [Production(B, Sentence(b, B)), Production(B, Sentence(b))], } conflict_str = build_conflict_str(G) parser = build_ll_parser(G, table=table) try: parser(conflict_str) assert False except Exception: pass
def test_is_slr_grammar(): GG = Grammar() S = GG.add_nonterminal("S", True) X = GG.add_nonterminal("X") if_, then, else_, num = GG.add_terminals("if then else num") S %= if_ + X + then + S S %= if_ + X + then + S + else_ + S S %= num X %= num assert is_slr_grammar(GG) == False
def test_remove_left_recursion_2(): grammar = Grammar() A = grammar.add_nonterminal("A", True) B, C, D, E, F = grammar.add_nonterminals("B C D E F") a, b, c, d = grammar.add_terminals("a b c d") A %= b + B A %= c + C A %= d + D B %= c + C B %= grammar.epsilon C %= c + c + c C %= A C %= a C %= b C %= grammar.epsilon D %= d D %= b D %= E D %= grammar.epsilon E %= F E %= C F %= D new_grammar = remove_left_recursion(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["b"], ["A", "b"]] _graph["A"] = [["B", "a"]] _graph["B"] = [["b", "b", "B'"]] _graph["B'"] = [["a", "b", "b", "B'"], []] _graph["C"] = [["b"]] print(_graph) print(new_grammar) assert True # A -> b B | c C | d D # B -> c C | eps # C -> c c c | A | a | b | eps # D -> d | b | E | eps # E -> F | C # F -> D
def test_gramar_to_automaton(): pass G = Grammar() S = G.add_nonterminal("S", True) A, B = G.add_nonterminals("A B") a, b = G.add_terminals("a b") S %= a + A S %= b + B A %= a + A A %= a B %= b + B B %= b
def test_is_regular(): G = Grammar() S = G.add_nonterminal("S", True) A, B = G.add_nonterminals("A B") a, b = G.add_terminals("a b") S %= a + A S %= b + B A %= a + A A %= a B %= b + B B %= b assert is_regular_grammar(G)
def evaluate_ast(node): grammar = Grammar() register_start_symbol(node, grammar) register_nonterminals(node, grammar) register_terminals(node, grammar) register_productions(node, grammar) return grammar
def test_remove_unitary_prods_2(): grammar = Grammar() A = grammar.add_nonterminal("A", True) B, C, D, E, F = grammar.add_nonterminals("B C D E F") a, b, c, d = grammar.add_terminals("a b c d") A %= b + B A %= c + C A %= d + D B %= c + C B %= grammar.epsilon C %= c + c + c C %= A C %= a C %= b C %= grammar.epsilon D %= d D %= b D %= E D %= grammar.epsilon E %= F E %= C F %= D new_grammar = remove_unit_prods(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["a"]] _graph["A"] = [["B", "A''"]] _graph["A'"] = [["b"], ["a"]] _graph["A''"] = [[], ["a", "A'"]] assert True
def build_input_grammar(): """ Returns the following grammar: grammar -> prod_list prod_list -> prod | prod eol prod_list prod -> symbol '->' sent_list sent_list -> sent | sent '|' sent_list sent -> symbol_list | 'eps' symbol_list -> symbol | symbol symbol_list """ input_grammar = Grammar() grammar = input_grammar.add_nonterminal("grammar", True) prod, prod_list = input_grammar.add_nonterminals("prod prod_list") sent, sent_list, symbol_list = input_grammar.add_nonterminals( "sent sent_list symbol_list") symbol, arrow, union, eps, eol = input_grammar.add_terminals( "symbol -> | eps eol") grammar %= prod_list, lambda h, s: GNode(s[1]) prod_list %= prod + eol + prod_list, lambda h, s: s[1] + s[3] prod_list %= prod, lambda h, s: s[1] prod %= ( symbol + arrow + sent_list, lambda h, s: [ProdNode(SymbolNode(s[1]), i) for i in s[3]], ) sent_list %= sent + union + sent_list, lambda h, s: [s[1]] + s[3] sent_list %= sent, lambda h, s: [s[1]] sent %= symbol_list, lambda h, s: SentNode(s[1]) sent %= eps, lambda h, s: SentNode([EpsNode(s[1])]) symbol_list %= symbol + symbol_list, lambda h, s: [SymbolNode(s[1])] + s[2] symbol_list %= symbol, lambda h, s: [SymbolNode(s[1])] return input_grammar
def test_is_regular_2(): G = Grammar() A = G.add_nonterminal("A", True) B, C, D = G.add_nonterminals("B C D") b, c, d = G.add_terminals("b c d") A %= b + B A %= c + C A %= d + D B %= c + C B %= G.epsilon C %= c + c + c C %= G.epsilon D %= d D %= b D %= G.epsilon assert not is_regular_grammar(G)
def test_automaton_to_regex(): G = Grammar() S = G.add_nonterminal("S", True) a, b = G.add_terminals("a b") S %= a + S S %= a S %= b aut = grammar_to_automaton(G) # aut.graph().write("/home/rodrigo/Projects/grammar_analizer/graph", # format="svg") regex = automaton_to_regex(aut) print(regex) r = Regex(regex) assert r("aaaaaaaaaab") assert not r("aaaaaaaaaabbbbbb") assert not r("bababbbbabb")
def graph_to_grammar(start_symbol: str, productions: dict): grammar = Grammar() for nt in productions.keys(): is_start_symbol = nt == start_symbol grammar.add_nonterminal(nt, start_symbol=is_start_symbol) bodies = chain(*tuple(productions.values())) symbols = chain(*tuple(bodies)) teminals = (s for s in symbols if grammar[s] is None) for t in teminals: grammar.add_terminal(t) for head, bodies in productions.items(): for body in bodies: body = [grammar.epsilon ] if body == "" else [grammar[s] for s in body] grammar.add_production(Production(grammar[head], Sentence(*body))) return grammar
def test_remove_unit_prods(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A S %= b A %= B A %= a B %= a B %= C + b C %= a new_grammar = remove_unnecesary_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["a"], ["b"], ["C", "b"]] _graph["C"] = [["a"]] assert new_grammar == _graph grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + B S %= C A %= A + b A %= a B %= b C %= a C %= b new_grammar = remove_unnecesary_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) # _graph = {} # _graph["S"] = [["b"], ["a"], ["A", "B"]] # _graph["A"] = [["A", "b"], ["a"]] # _graph["B"] = [["b"]] # print(_graph) # print(new_grammar) # assert (new_grammar == _graph) grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A S %= b A %= B A %= a B %= a B %= C + b C %= a new_grammar = remove_unnecesary_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["a"], ["b"], ["C", "b"]] _graph["C"] = [["a"]] assert new_grammar == _graph grammar = Grammar() S = grammar.add_nonterminal("S", True) X, Y = grammar.add_nonterminals("X Y") a, b = grammar.add_terminals("a b") S %= X + a S %= Y X %= b Y %= a new_grammar = remove_unnecesary_productions(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["X", "a"], ["b"]] _graph["X"] = [["b"]]
def test_remove_unreachable_prods(): grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A S %= b A %= B B %= a C %= a new_grammar = remove_unreachable_prods(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["A"], ["b"]] _graph["A"] = [["B"]] _graph["B"] = [["a"]] assert new_grammar == _graph grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + b S %= C A %= B + a B %= S + b C %= b new_grammar = remove_unreachable_prods(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["A", "b"], ["C"]] _graph["A"] = [["B", "a"]] _graph["B"] = [["S", "b"]] _graph["C"] = [["b"]] assert new_grammar == _graph grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= A + B S %= b S %= a A %= B + a B %= S + b C %= b C %= a new_grammar = remove_unreachable_prods(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["A", "B"], ["b"], ["a"]] _graph["A"] = [["B", "a"]] _graph["B"] = [["S", "b"]] assert new_grammar == _graph # _graph = {} # _graph["S"] = [["a"], ["b"], ["C", "b"]] # _graph["A"] = [["a"], ["C", "b"]] # _graph["B"] = [["a"], ["C", "b"]] # _graph["C"] = [["a"]] grammar = Grammar() S = grammar.add_nonterminal("S", True) A, B, C = grammar.add_nonterminals("A B C") a, b = grammar.add_terminals("a b") S %= a S %= b S %= C + b A %= a A %= C + b B %= a B %= C + b C %= a new_grammar = remove_unreachable_prods(grammar) _, new_grammar = grammar_to_graph(new_grammar) _graph = {} _graph["S"] = [["a"], ["b"], ["C", "b"]] _graph["C"] = [["a"]] print(_graph) print(new_grammar) assert new_grammar == _graph
from pycmp.parsing import compute_firsts from pycmp.token import Token from pycmp.utils import ContainerSet from pycmp.grammar import Grammar, Sentence, Production from pycmp.grammar import AttributedProduction, Item test_compute_firsts_cases = [] test_compute_follows_cases = [] test_build_ll_table_cases = [] test_build_ll_parser_cases = [] test_evaluate_parse_cases = [] grammar = Grammar() E = grammar.add_nonterminal("E", True) T, F, X, Y = grammar.add_nonterminals("T F X Y") plus, minus, star, div, opar, cpar, num = grammar.add_terminals( "+ - * / ( ) num") E %= T + X, lambda h, s: s[2], None, lambda h, s: s[1] X %= plus + T + X, lambda h, s: s[3], None, None, lambda h, s: h[0] + s[2] X %= minus + T + X, lambda h, s: s[3], None, None, lambda h, s: h[0] - s[2] X %= grammar.epsilon, lambda h, s: h[0] T %= F + Y, lambda h, s: s[2], None, lambda h, s: s[1] Y %= star + F + Y, lambda h, s: s[3], None, None, lambda h, s: h[0] * s[2] Y %= div + F + Y, lambda h, s: s[3], None, None, lambda h, s: h[0] / s[2] Y %= grammar.epsilon, lambda h, s: h[0] F %= opar + E + cpar, lambda h, s: s[2], None, None, None