Beispiel #1
0
def build_regex_grammar():
    G = Grammar()

    E = G.add_nonterminal("E", True)
    T, F, A, X, Y, Z = G.add_nonterminals("T F A X Y Z")
    pipe, star, opar, cpar, symbol, epsilon = G.add_terminals("| * ( ) symbol ε")

    E %= T + X, lambda h, s: s[2], None, lambda h, s: s[1]

    X %= pipe + T + X, lambda h, s: s[3], None, None, lambda h, s: UnionNode(h[0], s[2])
    X %= G.epsilon, lambda h, s: h[0]

    T %= F + Y, lambda h, s: s[2], None, lambda h, s: s[1]

    Y %= F + Y, lambda h, s: s[2], None, lambda h, s: ConcatNode(h[0], s[1])
    Y %= G.epsilon, lambda h, s: h[0]

    F %= A + Z, lambda h, s: s[2], None, lambda h, s: s[1]

    Z %= star + Z, lambda h, s: s[2], None, lambda h, s: ClosureNode(h[0])
    Z %= G.epsilon, lambda h, s: h[0]

    A %= symbol, lambda h, s: SymbolNode(s[1])
    A %= opar + E + cpar, lambda h, s: s[2], None, None, None
    A %= epsilon, lambda h, s: EpsilonNode(s[1])

    return G
Beispiel #2
0
def test_general_recursion_remove():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + b
    S %= C

    A %= B + a

    B %= S + b

    C %= b

    new_grammar = remove_left_recursion(grammar)
    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["b"], ["A", "b"]]
    _graph["A"] = [["B", "a"]]
    _graph["B"] = [["b", "b", "B'"]]
    _graph["B'"] = [["a", "b", "b", "B'"], []]
    _graph["C"] = [["b"]]

    print(_graph)
    print(new_grammar)

    assert (new_grammar == _graph)
def test_remove_unreachable_prods():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= a
    A %= B + a + b
    A %= B + a + a
    A %= B

    new_grammar = remove_common_prefixes(grammar)

    S, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["a"]]
    _graph["A"] = [["B", "A''"]]
    _graph["A'"] = [["b"], ["a"]]
    _graph["A''"] = [[], ["a", "A'"]]

    print(new_grammar)
    print(_graph)

    assert new_grammar == _graph
Beispiel #4
0
def test_epsilon_remove():
    # epsilon test #1
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + B
    S %= C

    A %= b + A + b
    A %= grammar.epsilon

    B %= b

    C %= a
    C %= b

    new_grammar = __remove_epsilon_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["A", "B"], ["C"], ["B"]]
    _graph["A"] = [["b", "A", "b"], ["b", "b"]]
    _graph["B"] = [["b"]]
    _graph["C"] = [["a"], ["b"]]

    assert (new_grammar == _graph)

    # epsilon test #2

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + B
    S %= C

    A %= b + A + b
    A %= grammar.epsilon

    B %= b
    B %= grammar.epsilon

    C %= a
    C %= b

    new_grammar = __remove_epsilon_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["A", "B"], ["C"], ["B"], ["A"], []]
    _graph["A"] = [["b", "A", "b"], ["b", "b"]]
    _graph["B"] = [["b"]]
    _graph["C"] = [["a"], ["b"]]

    assert (new_grammar == _graph)
Beispiel #5
0
def test_direct_recursion_remove():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + B
    S %= C

    A %= A + b
    A %= a

    B %= b

    C %= a
    C %= b

    _, G = grammar_to_graph(grammar)
    new_grammar = __remove_inmediate_left_recursion(G)

    _graph = {}
    _graph["S"] = [["A", "B"], ["C"]]
    _graph["A"] = [["a", "A'"]]
    _graph["A'"] = [["b", "A'"], []]
    _graph["B"] = [["b"]]
    _graph["C"] = [["a"], ["b"]]

    assert (new_grammar == _graph)
Beispiel #6
0
def test_grammar_to_graph():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C, X, Y = grammar.add_nonterminals("A B C X Y")
    a, b, d, e = grammar.add_terminals("a b d e")

    S %= A + B
    S %= C

    A %= C
    A %= d

    B %= Y

    C %= a
    C %= b
    C %= X

    X %= d
    X %= e

    Y %= e

    _S, graph = grammar_to_graph(grammar)

    _graph = {}
    _graph["S"] = [["A", "B"], ["C"]]
    _graph["A"] = [["C"], ["d"]]
    _graph["B"] = [["Y"]]
    _graph["C"] = [["a"], ["b"], ["X"]]
    _graph["X"] = [["d"], ["e"]]
    _graph["Y"] = [["e"]]

    assert (graph == _graph)
def test_build_conflict_str():
    G = Grammar()

    S = G.add_nonterminal("S", True)
    A, B = G.add_nonterminals("A B")
    a, b = G.add_terminals("a b")

    S %= A + B
    A %= a + A | a
    B %= b + B | b

    table = {
        (S, a): [Production(S, Sentence(A, B))],
        (A, a): [Production(A, Sentence(a, A)), Production(A, Sentence(a))],
        (B, b): [Production(B, Sentence(b, B)), Production(B, Sentence(b))],
    }

    conflict_str = build_conflict_str(G)
    parser = build_ll_parser(G, table=table)

    try:
        parser(conflict_str)
        assert False
    except Exception:
        pass
Beispiel #8
0
def test_is_slr_grammar():
    GG = Grammar()

    S = GG.add_nonterminal("S", True)
    X = GG.add_nonterminal("X")
    if_, then, else_, num = GG.add_terminals("if then else num")

    S %= if_ + X + then + S
    S %= if_ + X + then + S + else_ + S
    S %= num
    X %= num

    assert is_slr_grammar(GG) == False
def test_remove_left_recursion_2():
    grammar = Grammar()
    A = grammar.add_nonterminal("A", True)
    B, C, D, E, F = grammar.add_nonterminals("B C D E F")
    a, b, c, d = grammar.add_terminals("a b c d")

    A %= b + B
    A %= c + C
    A %= d + D

    B %= c + C
    B %= grammar.epsilon

    C %= c + c + c
    C %= A
    C %= a
    C %= b
    C %= grammar.epsilon

    D %= d
    D %= b
    D %= E
    D %= grammar.epsilon

    E %= F
    E %= C

    F %= D

    new_grammar = remove_left_recursion(grammar)
    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["b"], ["A", "b"]]
    _graph["A"] = [["B", "a"]]
    _graph["B"] = [["b", "b", "B'"]]
    _graph["B'"] = [["a", "b", "b", "B'"], []]
    _graph["C"] = [["b"]]

    print(_graph)
    print(new_grammar)

    assert True


# A -> b B | c C | d D
# B -> c C | eps
# C -> c c c | A | a | b | eps
# D -> d | b  | E | eps
# E -> F | C
# F -> D
def test_gramar_to_automaton():
    pass
    G = Grammar()

    S = G.add_nonterminal("S", True)
    A, B = G.add_nonterminals("A B")
    a, b = G.add_terminals("a b")

    S %= a + A
    S %= b + B
    A %= a + A
    A %= a
    B %= b + B
    B %= b
def test_is_regular():
    G = Grammar()

    S = G.add_nonterminal("S", True)
    A, B = G.add_nonterminals("A B")
    a, b = G.add_terminals("a b")

    S %= a + A
    S %= b + B
    A %= a + A
    A %= a
    B %= b + B
    B %= b

    assert is_regular_grammar(G)
def evaluate_ast(node):
    grammar = Grammar()
    register_start_symbol(node, grammar)
    register_nonterminals(node, grammar)
    register_terminals(node, grammar)
    register_productions(node, grammar)
    return grammar
Beispiel #13
0
def test_remove_unitary_prods_2():
    grammar = Grammar()
    A = grammar.add_nonterminal("A", True)
    B, C, D, E, F = grammar.add_nonterminals("B C D E F")
    a, b, c, d = grammar.add_terminals("a b c d")

    A %= b + B
    A %= c + C
    A %= d + D

    B %= c + C
    B %= grammar.epsilon

    C %= c + c + c
    C %= A
    C %= a
    C %= b
    C %= grammar.epsilon

    D %= d
    D %= b
    D %= E
    D %= grammar.epsilon

    E %= F
    E %= C

    F %= D

    new_grammar = remove_unit_prods(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["a"]]
    _graph["A"] = [["B", "A''"]]
    _graph["A'"] = [["b"], ["a"]]
    _graph["A''"] = [[], ["a", "A'"]]

    assert True
def build_input_grammar():
    """
    Returns the following grammar:

    grammar -> prod_list
    prod_list -> prod | prod eol prod_list
    prod -> symbol '->' sent_list
    sent_list -> sent | sent '|' sent_list
    sent -> symbol_list | 'eps'
    symbol_list -> symbol | symbol symbol_list
    """
    input_grammar = Grammar()
    grammar = input_grammar.add_nonterminal("grammar", True)
    prod, prod_list = input_grammar.add_nonterminals("prod prod_list")
    sent, sent_list, symbol_list = input_grammar.add_nonterminals(
        "sent sent_list symbol_list")
    symbol, arrow, union, eps, eol = input_grammar.add_terminals(
        "symbol -> | eps eol")

    grammar %= prod_list, lambda h, s: GNode(s[1])
    prod_list %= prod + eol + prod_list, lambda h, s: s[1] + s[3]
    prod_list %= prod, lambda h, s: s[1]
    prod %= (
        symbol + arrow + sent_list,
        lambda h, s: [ProdNode(SymbolNode(s[1]), i) for i in s[3]],
    )
    sent_list %= sent + union + sent_list, lambda h, s: [s[1]] + s[3]
    sent_list %= sent, lambda h, s: [s[1]]
    sent %= symbol_list, lambda h, s: SentNode(s[1])
    sent %= eps, lambda h, s: SentNode([EpsNode(s[1])])
    symbol_list %= symbol + symbol_list, lambda h, s: [SymbolNode(s[1])] + s[2]
    symbol_list %= symbol, lambda h, s: [SymbolNode(s[1])]

    return input_grammar
def test_is_regular_2():
    G = Grammar()

    A = G.add_nonterminal("A", True)
    B, C, D = G.add_nonterminals("B C D")
    b, c, d = G.add_terminals("b c d")

    A %= b + B
    A %= c + C
    A %= d + D

    B %= c + C
    B %= G.epsilon

    C %= c + c + c
    C %= G.epsilon

    D %= d
    D %= b
    D %= G.epsilon

    assert not is_regular_grammar(G)
def test_automaton_to_regex():
    G = Grammar()

    S = G.add_nonterminal("S", True)
    a, b = G.add_terminals("a b")

    S %= a + S
    S %= a
    S %= b

    aut = grammar_to_automaton(G)

    # aut.graph().write("/home/rodrigo/Projects/grammar_analizer/graph",
    #                   format="svg")

    regex = automaton_to_regex(aut)

    print(regex)

    r = Regex(regex)

    assert r("aaaaaaaaaab")
    assert not r("aaaaaaaaaabbbbbb")
    assert not r("bababbbbabb")
def graph_to_grammar(start_symbol: str, productions: dict):
    grammar = Grammar()
    for nt in productions.keys():
        is_start_symbol = nt == start_symbol
        grammar.add_nonterminal(nt, start_symbol=is_start_symbol)

    bodies = chain(*tuple(productions.values()))
    symbols = chain(*tuple(bodies))
    teminals = (s for s in symbols if grammar[s] is None)
    for t in teminals:
        grammar.add_terminal(t)

    for head, bodies in productions.items():
        for body in bodies:
            body = [grammar.epsilon
                    ] if body == "" else [grammar[s] for s in body]
            grammar.add_production(Production(grammar[head], Sentence(*body)))
    return grammar
Beispiel #18
0
def test_remove_unit_prods():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A
    S %= b

    A %= B
    A %= a

    B %= a
    B %= C + b

    C %= a

    new_grammar = remove_unnecesary_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["a"], ["b"], ["C", "b"]]
    _graph["C"] = [["a"]]

    assert new_grammar == _graph

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + B
    S %= C

    A %= A + b
    A %= a

    B %= b

    C %= a
    C %= b

    new_grammar = remove_unnecesary_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    # _graph = {}
    # _graph["S"] = [["b"], ["a"], ["A", "B"]]
    # _graph["A"] = [["A", "b"], ["a"]]
    # _graph["B"] = [["b"]]

    # print(_graph)
    # print(new_grammar)
    # assert (new_grammar == _graph)

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A
    S %= b

    A %= B
    A %= a

    B %= a
    B %= C + b

    C %= a

    new_grammar = remove_unnecesary_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["a"], ["b"], ["C", "b"]]
    _graph["C"] = [["a"]]

    assert new_grammar == _graph

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    X, Y = grammar.add_nonterminals("X Y")
    a, b = grammar.add_terminals("a b")

    S %= X + a
    S %= Y

    X %= b
    Y %= a

    new_grammar = remove_unnecesary_productions(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["X", "a"], ["b"]]
    _graph["X"] = [["b"]]
Beispiel #19
0
def test_remove_unreachable_prods():
    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A
    S %= b

    A %= B

    B %= a

    C %= a

    new_grammar = remove_unreachable_prods(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["A"], ["b"]]
    _graph["A"] = [["B"]]
    _graph["B"] = [["a"]]

    assert new_grammar == _graph

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + b
    S %= C

    A %= B + a

    B %= S + b

    C %= b

    new_grammar = remove_unreachable_prods(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["A", "b"], ["C"]]
    _graph["A"] = [["B", "a"]]
    _graph["B"] = [["S", "b"]]
    _graph["C"] = [["b"]]
    assert new_grammar == _graph

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= A + B
    S %= b
    S %= a

    A %= B + a

    B %= S + b

    C %= b
    C %= a

    new_grammar = remove_unreachable_prods(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["A", "B"], ["b"], ["a"]]
    _graph["A"] = [["B", "a"]]
    _graph["B"] = [["S", "b"]]

    assert new_grammar == _graph

    # _graph = {}
    # _graph["S"] = [["a"], ["b"], ["C", "b"]]
    # _graph["A"] = [["a"], ["C", "b"]]
    # _graph["B"] = [["a"], ["C", "b"]]
    # _graph["C"] = [["a"]]

    grammar = Grammar()
    S = grammar.add_nonterminal("S", True)
    A, B, C = grammar.add_nonterminals("A B C")
    a, b = grammar.add_terminals("a b")

    S %= a
    S %= b
    S %= C + b

    A %= a
    A %= C + b

    B %= a
    B %= C + b

    C %= a

    new_grammar = remove_unreachable_prods(grammar)

    _, new_grammar = grammar_to_graph(new_grammar)

    _graph = {}
    _graph["S"] = [["a"], ["b"], ["C", "b"]]
    _graph["C"] = [["a"]]

    print(_graph)
    print(new_grammar)
    assert new_grammar == _graph
Beispiel #20
0
from pycmp.parsing import compute_firsts
from pycmp.token import Token
from pycmp.utils import ContainerSet
from pycmp.grammar import Grammar, Sentence, Production
from pycmp.grammar import AttributedProduction, Item

test_compute_firsts_cases = []
test_compute_follows_cases = []
test_build_ll_table_cases = []
test_build_ll_parser_cases = []
test_evaluate_parse_cases = []

grammar = Grammar()
E = grammar.add_nonterminal("E", True)
T, F, X, Y = grammar.add_nonterminals("T F X Y")
plus, minus, star, div, opar, cpar, num = grammar.add_terminals(
    "+ - * / ( ) num")

E %= T + X, lambda h, s: s[2], None, lambda h, s: s[1]

X %= plus + T + X, lambda h, s: s[3], None, None, lambda h, s: h[0] + s[2]
X %= minus + T + X, lambda h, s: s[3], None, None, lambda h, s: h[0] - s[2]
X %= grammar.epsilon, lambda h, s: h[0]

T %= F + Y, lambda h, s: s[2], None, lambda h, s: s[1]

Y %= star + F + Y, lambda h, s: s[3], None, None, lambda h, s: h[0] * s[2]
Y %= div + F + Y, lambda h, s: s[3], None, None, lambda h, s: h[0] / s[2]
Y %= grammar.epsilon, lambda h, s: h[0]

F %= opar + E + cpar, lambda h, s: s[2], None, None, None