def parse_grammar(initial, terminals, non_terminals, productions): """ Parse grammar from plain text to CP format """ G = Grammar() productions = productions.split('\n') f['epsilon'] = G.Epsilon f['EOF'] = G.EOF f[initial] = G.NonTerminal(initial, True) for terminal in terminals.split(' '): f[terminal] = G.Terminal(terminal) for non_terminal in non_terminals.split(' '): if non_terminal != f[initial].Name: f[non_terminal] = G.NonTerminal(non_terminal) # Parsing productions for production in productions: if not production: continue parse_production(production, G) if errors: break if errors: return (-1, errors) return (0, G)
def G5(): G = Grammar() S = G.NonTerminal('S', True) A = G.NonTerminal('A') a = G.Terminal('a') S %= S + A | A A %= a return G
def G6(): G = Grammar() S = G.NonTerminal('S', True) A = G.NonTerminal('A') a, b, c, d = G.Terminals('a b c d') S %= A + a | b + A + c | d + c | b + d + a A %= d return G
def G3(): G = Grammar() S = G.NonTerminal('S', True) F = G.NonTerminal('F') a, plus, opar, cpar = G.Terminals('a + ( )') S %= F S %= opar + S + plus + F + cpar F %= a return G
def G4(): G = Grammar() E = G.NonTerminal('E', True) B = G.NonTerminal('B') cero, one, plus, star = G.Terminals('0 1 + *') E %= E + star + B E %= E + plus + B E %= B B %= cero B %= one return G
def unit_testing(): G = Grammar() E = G.NonTerminal('E', True) A = G.NonTerminal('A') equal, plus, num = G.Terminals('= + int') E %= A + equal + A | num A %= num + plus + A | num parser = LR1(G) derivation = parser([num, plus, num, equal, num, plus, num, G.EOF]) assert str( derivation ) == '[A -> int, A -> int + A, A -> int, A -> int + A, E -> A = A]' return "LR1"
def execute_node(node:TrieNode,left,right:list,grammar:Grammar,idx): """ Fills productions with the new grammar productions\n left: left part of the production\n right: right part of the production in a list\n productions: all the productions of the grammar without common prefixes\n idx: index of the generated name """ right.append(node.value) if len(node.sons) > 1 or (len(node.sons) == 1 and node.terminal): name,idx = generate_name(grammar,idx,left.Name) new_prod = grammar.NonTerminal(name) right.append(new_prod) grammar.Add_Production(Production(left,SentenceFromIter(right))) left = new_prod if node.terminal: grammar.Add_Production(Production(left,grammar.Epsilon)) for x in node.sons: right = [] execute_node(node.sons[x],left,right,grammar,idx) elif len(node.sons) == 1: for key in node.sons: execute_node(node.sons[key],left,right,grammar,idx) break else: grammar.Add_Production(Production(left,SentenceFromIter(right)))
def delete_immediate_left_recursion(G: Grammar): """ Algoritmo para eliminar la recursion izquierda inmediata """ for symbol in G.nonTerminals: if any(not body.IsEpsilon and body[0] == symbol for _, body in symbol.productions): last_productions = set(symbol.productions) A = G.NonTerminal(f"{symbol}'") new_sents = [ body + A for _, body in symbol.productions if body.IsEpsilon or body[0] != symbol ] for _, body in symbol.productions: if not body.IsEpsilon and body[0] == symbol: # A' -> b A' A %= Sentence(*(body[1:] + (A, ))) A %= G.Epsilon for sent in new_sents: # A -> b A' symbol %= sent symbol.productions = list( set(symbol.productions) - last_productions) G.Productions = list(set(G.Productions) - last_productions) return G
def Grammar(): G = Grammar() E = G.NonTerminal('E', True) T, F, A, L = G.NonTerminals('T F A L') pipe, star, opar, cpar, symbol, epsilon, osquare, csquare, minus, plus, question = G.Terminals( '| * ( ) symbol ε [ ] - + ?') E %= E + pipe + T, lambda s: UnionNode(s[1], s[3]) E %= T, lambda s: s[1] T %= T + F, lambda s: ConcatNode(s[1], s[2]) T %= F, lambda s: s[1] F %= A + star, lambda s: ClosureNode(s[1]) F %= A + plus, lambda s: PlusNode(s[1]) F %= A + question, lambda s: QuestionNode(s[1]) F %= A, lambda s: s[1] A %= symbol, lambda s: SymbolNode(s[1]) A %= epsilon, lambda s: EpsilonNode(s[1]) A %= opar + E + cpar, lambda s: s[2] A %= osquare + L + csquare, lambda s: s[2] L %= symbol, lambda s: SymbolNode(s[1]) L %= symbol + minus + symbol, lambda s: RangeNode(SymbolNode(s[1]), SymbolNode(s[3])) L %= symbol + L, lambda s: UnionNode(SymbolNode(s[1]), s[2]) L %= symbol + minus + symbol + L, lambda s: UnionNode(RangeNode(SymbolNode(s[1]), SymbolNode(s[3])), s[4]) return G
def grammar_for_regex(terminals='| * ( ) symbol ε'): G_for_regular_exp = Grammar() E = G_for_regular_exp.NonTerminal('E', True) T, F, A, X, Y, Z = G_for_regular_exp.NonTerminals('T F A X Y Z') pipe, star, opar, cpar, symbol, epsilon = G_for_regular_exp.Terminals( terminals) E %= T + X, lambda h, s: s[2], None, lambda h, s: s[1] X %= pipe + T + X, lambda h, s: s[3], None, None, lambda h, s: UnionNode( h[0], s[2]) X %= G_for_regular_exp.Epsilon, lambda h, s: h[0] T %= F + Y, lambda h, s: s[2], None, lambda h, s: s[1] Y %= F + Y, lambda h, s: s[2], None, lambda h, s: ConcatNode(h[0], s[1]) Y %= G_for_regular_exp.Epsilon, lambda h, s: h[0] F %= A + Z, lambda h, s: s[2], None, lambda h, s: s[1] Z %= star + Z, lambda h, s: s[2], None, lambda h, s: ClosureNode(h[0]) Z %= G_for_regular_exp.Epsilon, lambda h, s: h[0] A %= opar + E + cpar, lambda h, s: s[2], None, None, None A %= symbol, lambda h, s: SymbolNode(s[1]), None A %= epsilon, lambda h, s: EpsilonNode(h[0]) return G_for_regular_exp, symbol.Name
def __init__(self, verbose=False): G = Grammar() G.NonTerminal('E', True) G.NonTerminals('T F A L') G.Terminals('| * ( ) symbol ε [ ] - + ?') self.G = G self.verbose = verbose self.action = self.__action_table() self.goto = self.__goto_table()
def G1(): G = Grammar() A = G.NonTerminal('A', True) B, C, D = G.NonTerminals('B C D') a, b, c, d = G.Terminals('a b c d') A %= a + B B %= b return G
def G9(): G = Grammar() E = G.NonTerminal('E', True) T, F = G.NonTerminals('T F') num, plus, star, opar, cpar = G.Terminals('n + * ( )') E %= E + plus + T | T T %= T + star + F | F F %= num | opar + E + cpar return G
def G8(): G = Grammar() A = G.NonTerminal('A', True) B, C = G.NonTerminals('B C') x, y, z = G.Terminals('x y z') A %= C + x + A | G.Epsilon B %= x + C + y | x + C C %= x + B + x | z return G
def G7(): G = Grammar() S = G.NonTerminal('S', True) A, B = G.NonTerminals('A B') a, b, c, d = G.Terminals('a b c d') S %= A + a | b + A + c | B + c | b + B + a A %= d B %= d return G
def __unit_testing_get_grammar(): G = Grammar() E = G.NonTerminal('E', True) T, F, X, Y = G.NonTerminals('T F X Y') plus, minus, star, div, opar, cpar, num = G.Terminals('+ - * / ( ) num') E %= T + X X %= plus + T + X | minus + T + X | G.Epsilon T %= F + Y Y %= star + F + Y | div + F + Y | G.Epsilon F %= num | opar + E + cpar return G
def delete_common_prefix(G: Grammar): """ Algoritmo para eliminar los prefijos comunes de las producciones con la misma cabecera Por cada no terminal busca si dos de sus produciones tiene prefijos comunes """ for nonterminal in G.nonTerminals: change = True primes = '' while change: change = False for production0 in nonterminal.productions: _continue = False for production1 in nonterminal.productions: if production0 != production1: lpc = 0 for i in range((min(len(production0.Right), len(production1.Right)))): if production0.Right[i] == production1.Right[i]: lpc += 1 else: break # En caso de que si tengan prefijos comunes se realiza el siguiente cambio: # E -> aA | aB # Entonces se cambia por : # E -> aE' # E' -> A | B if lpc > 0: primes += '\'' temp = G.NonTerminal(f"{nonterminal.Name}{primes}", False) nonterminal.productions.remove(production0) nonterminal.productions.remove(production1) G.Productions.remove(production0) G.Productions.remove(production1) nonterminal %= Sentence(*production0.Right[0:lpc] + (temp, )) alpha = production0.Right[lpc:] betha = production1.Right[lpc:] if len(alpha) == 0: temp %= G.Epsilon else: temp %= Sentence(*alpha) if len(betha) == 0: temp %= G.Epsilon else: temp %= Sentence(*betha) change = True _continue = True break if _continue: continue return G
def G2(): G = Grammar() S = G.NonTerminal('A', True) E, T, F = G.NonTerminals('E T F') plus, opar, cpar, mul, n = G.Terminals('+ ( ) * n') S %= E E %= T | E + plus + T T %= F | T + mul + F F %= n | opar + E + cpar return G
def remove_ambiguity(G: Grammar): """ Transforms productions of a non terminal for remove ambiguity. """ change = True while change: change = False prods = G.Productions for nt in G.nonTerminals: p_dict = {} # pi.Right[0] : {p1, p2, ..., pn} for p in nt.productions: if p.IsEpsilon: continue try: p_dict[p.Right[0].Name].append(p) except KeyError: p_dict[p.Right[0].Name] = [p] next_appendix = "'" for p_set in p_dict.values(): if len( p_set ) > 1: # Means nt has ambiguous production (all in p_set) new_left = G.NonTerminal(nt.Name + next_appendix) next_appendix = next_appendix + "'" for p in p_set: new_right = p.Right[1:] if len(new_right) == 0: prods.append(Production(new_left, G.Epsilon)) else: prods.append( Production(new_left, Sentence(*new_right))) prods.remove(p) prods.append(Production(nt, Sentence(p.Right[0], new_left))) change = True # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions G.Add_Production(p)
def remove_left_recursion(G: Grammar): """ Eliminates all left-recursion for any CFG with no e-productions and no cycles. """ def has_lr(nt: NonTerminal) -> bool: """ True if `nt` has left recursion. """ return any(p.Left == p.Right[0] for p in nt.productions) prods = [p for p in G.Productions] new_prods = [] for nt in G.nonTerminals: if has_lr(nt): new_symbol = G.NonTerminal(nt.Name + "'") for p in nt.productions: if p.Right[0] == p.Left: # Production has the from A -> Axyz new_right = [s for s in p.Right[1:]] new_right.append(new_symbol) new_prods.append( Production(new_symbol, Sentence(*new_right))) else: # Production has the from A -> xyz new_right = [s for s in p.Right[0:]] new_right.append(new_symbol) new_prods.append(Production(p.Left, Sentence(*new_right))) new_prods.append(Production(new_symbol, G.Epsilon)) else: for p in nt.productions: new_prods.append(p) # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in new_prods: # Add new productions G.Add_Production(p)
from cmp.ast import * from utils.macros import * from cmp.pycompiler import Grammar from cmp.utils_parser import LR1Parser CoolGrammar = Grammar() # non-terminals program = CoolGrammar.NonTerminal('<program>', startSymbol=True) class_list, def_class = CoolGrammar.NonTerminals('<class-list> <def-class>') feature_list, feature = CoolGrammar.NonTerminals('<feature-list> <feature>') param_list, param = CoolGrammar.NonTerminals('<param-list> <param>') expr, member_call, expr_list, let_list, case_list = CoolGrammar.NonTerminals( '<expr> <member-call> <expr-list> <let-list> <case-list>') truth_expr, comp_expr = CoolGrammar.NonTerminals('<truth-expr> <comp-expr>') arith, term, factor, factor_2 = CoolGrammar.NonTerminals( '<arith> <term> <factor> <factor-2>') atom, func_call, arg_list = CoolGrammar.NonTerminals( '<atom> <func-call> <arg-list>') # terminals classx, inherits = CoolGrammar.Terminals('class inherits') ifx, then, elsex, fi = CoolGrammar.Terminals('if then else fi') whilex, loop, pool = CoolGrammar.Terminals('while loop pool') let, inx = CoolGrammar.Terminals('let in') case, of, esac = CoolGrammar.Terminals('case of esac') semi, colon, comma, dot, at, opar, cpar, ocur, ccur, larrow, rarrow = CoolGrammar.Terminals( '; : , . @ ( ) { } <- =>') plus, minus, star, div, isvoid, compl = CoolGrammar.Terminals( '+ - * / isvoid ~') notx, less, leq, equal = CoolGrammar.Terminals('not < <= =')
from cmp.pycompiler import Grammar from cool_ast import * G = Grammar() # Terminals classx, inherits, selfx, let, assigment, ifx, thenx, elsex, fi, = G.Terminals( 'class inherits self let assign if then else fi') whilex, loop, pool, case, of, esac, case_assigment, new, isvoid, equal, = G.Terminals( 'while loop pool case of esac rettype new isvoid =') less, less_equal, plus, minus, star, div, semi, colon, comma, complement = G.Terminals('< lesseq + - * / ; : , ~') dot, opar, cpar, ocur, ccur, inx, notx, idx, intx, string, true, false, at = G.Terminals( '. ( ) { } in not id int string true false @') # Non-terminals program = G.NonTerminal('<program>', True) class_list, def_class = G.NonTerminals('<class-list> <class>') feature_list, def_func, def_attr = G.NonTerminals('<feature_list> <def-func> <def-attr>') params_list, param = G.NonTerminals('<params-list> <param>') expr_list, expr = G.NonTerminals('<expr-list> <expr>') cmp, arith, term, factor, atom = G.NonTerminals('<cmp> <arith> <term> <factor> <atom>') func_call, arg_list = G.NonTerminals('<func-call> <arg-list>') let_list, let_single = G.NonTerminals('<let-list> <let-single>') case_list, case_single = G.NonTerminals('<case-list> <case-single>') # Productions program %= class_list, lambda h, s: ProgramNode(s[1]) class_list %= def_class + semi + class_list, lambda h, s: [s[1]] + s[3] class_list %= def_class + semi, lambda h, s: [s[1]]
import first_follow as ff from cmp.pycompiler import Grammar G = Grammar() E = G.NonTerminal('E', True) T, F, X, Y = G.NonTerminals('T F X Y') plus, minus, star, div, opar, cpar, num = G.Terminals('+ - * / ( ) num') E %= T + X X %= plus + T + X | minus + T + X | G.Epsilon T %= F + Y Y %= star + F + Y | div + F + Y | G.Epsilon F %= num | opar + E + cpar import cmp.languages xcool = cmp.languages.BasicXCool(G) firsts = ff.compute_firsts(G) assert firsts == xcool.firsts
def unit_testing(): G = Grammar() E = G.NonTerminal('E', True) T,F,X,Y = G.NonTerminals('T F X Y') plus, minus, star, div, opar, cpar, num = G.Terminals('+ - * / ( ) num') E %= T + X, lambda h,s: s[2], None, lambda h,s: s[1] X %= plus + T + X, lambda h,s: s[3], None, None, lambda h,s: s[2] + h[0] X %= minus + T + X, lambda h,s: s[3], None, None, lambda h,s: h[0] - s[2] X %= G.Epsilon, lambda h,s: h[0] T %= F + Y, lambda h,s: s[2], None, lambda h,s: s[1] Y %= star + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0] * s[2] Y %= div + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0]/s[2] Y %= G.Epsilon, lambda h,s: h[0] F %= num, lambda h,s: float(s[1]), None F %= opar + E + cpar, lambda h,s: s[2], None, None, None xcool = BasicXCool(G) tokens = [num, star, num, star, num, plus, num, star, num, plus, num, plus, num, G.EOF] M = _build_parsing_table(G,xcool.firsts,xcool.follows) assert M == xcool.table ,"Test Error in build_parsing_table" print(" - buider table ;) ") #################################################################### parser = _buid_parsing_func(G,M) left_parse,error = parser(tokens) assert error == [] assert left_parse == [ Production(E, Sentence(T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, G.Epsilon), ] ,"Test Error in parser_library.LL1.parser" print(" - buider func ;) ") ################################################################### fixed_tokens = { '+' : Token( '+', plus ), '-' : Token( '-', minus ), '*' : Token( '*', star ), '/' : Token( '/', div ), '(' : Token( '(', opar ), ')' : Token( ')', cpar ), } def tokenize_text(text): tokens = [] for item in text.split(): try: float(item) token = Token(item, num) except ValueError: try: token = fixed_tokens[item] except: raise Exception('Undefined token') tokens.append(token) eof = Token('$', G.EOF) tokens.append(eof) return tokens text = '5.9 + 4' tokens = [ Token('5.9', num), Token('+', plus), Token('4', num), Token('$', G.EOF) ] left_parse,error = parser(tokens) assert len(left_parse) == 9 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == 9.9,"Test Error in eval parser" text = '1 - 1 - 1' tokens = tokenize_text(text) left_parse,error = parser(tokens) assert len(left_parse) == 13 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == -1,"Test Error in eval parser" text = '1 - ( 1 - 1 )' tokens = tokenize_text(text) left_parse,error = parser(tokens) assert len(left_parse) == 18 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == 1,"Test Error in eval parser" print(" - method eval ;) ") ############################################################# return "LL1"
def get_grammar(): # grammar G = Grammar() # non-terminals program = G.NonTerminal('<program>', startSymbol=True) class_list, def_class = G.NonTerminals('<class-list> <def-class>') feature_list, def_attr, def_func = G.NonTerminals('<feature-list> <def-attr> <def-func>') param_list, param, expr_list = G.NonTerminals('<param-list> <param> <expr-list>') expr, arith, term, atom = G.NonTerminals('<expr> <arith> <term> <atom>') func_call, arg_list, param_list_formal = G.NonTerminals('<func-call> <arg-list> <param-list-formal>') if_expr, while_expr, block, let_expr, case_expr = G.NonTerminals('<if-expr> <while_expr> <block> <let-expr> <case-expr>') var_decl, let_att, case_body, arg_list_formal = G.NonTerminals('<var-decl> <let-att> <case-body> <arg-list-formal>') # terminals classx, let, _in = G.Terminals('class let in') semi, colon, comma, dot, opar, cpar, ocur, ccur, at = G.Terminals('; : , . ( ) { } @') equal, plus, minus, star, div = G.Terminals('= + - * /') idx, num, string, new, _self = G.Terminals('id int string new self') inheritsx, left_arrow, right_arrow, _not, isvoid, complement = G.Terminals('inherits <- => not isvoid ~') minor, minor_eq, true, false = G.Terminals('< <= true false') _if, fi, _else, then = G.Terminals('if fi else then') _while, loop, pool = G.Terminals('while loop pool') case, of, esac, double_quote = G.Terminals('case of esac "') # productions program %= class_list, lambda h,s: ProgramNode(s[1]) # <class-list> class_list %= def_class + semi, lambda h,s: [ s[1] ] class_list %= def_class + semi + class_list, lambda h,s: [ s[1] ] + s[3] # <def-class> def_class %= classx + idx + ocur + feature_list + ccur, lambda h,s: ClassDeclarationNode(s[2], s[4]) def_class %= classx + idx + inheritsx + idx + ocur + feature_list + ccur, lambda h,s: ClassDeclarationNode(s[2], s[6], s[4]) # <feature-list> feature_list %= G.Epsilon, lambda h,s: [ ] feature_list %= def_attr + semi + feature_list, lambda h,s: [ s[1] ] + s[3] feature_list %= def_func + semi + feature_list, lambda h,s: [ s[1] ] + s[3] # <def-attr> def_attr %= idx + colon + idx, lambda h,s: AttrDeclarationNode(s[1], s[3]) def_attr %= idx + colon + idx + left_arrow + expr, lambda h,s: AttrDeclarationNode(s[1], s[3], s[5]) # <def-func> def_func %= idx + opar + param_list + cpar + colon + idx + ocur + expr + ccur, lambda h,s: FuncDeclarationNode(s[1], s[3], s[6], s[8]) # <param-list> param_list %= param_list_formal, lambda h,s: s[1] param_list %= G.Epsilon, lambda h,s: [] # <param-list-formal> param_list_formal %= param, lambda h,s: [ s[1] ] param_list_formal %= param + comma + param_list_formal, lambda h,s: [ s[1] ] + s[3] # <param> param %= idx + colon + idx, lambda h,s: [s[1], s[3]] # <expr> expr %= arith, lambda h,s: s[1] expr %= expr + minor + arith, lambda h,s: MinorNode(s[1], s[3]) expr %= expr + minor_eq + arith, lambda h,s: MinorEqualNode(s[1], s[3]) expr %= expr + equal + arith, lambda h,s: EqualNode(s[1], s[3]) # <arith> arith %= term, lambda h,s: s[1] arith %= arith + plus + term, lambda h,s: PlusNode(s[1], s[3]) arith %= arith + minus + term, lambda h,s: MinusNode(s[1], s[3]) # <term> term %= atom, lambda h,s: s[1] term %= term + star + atom, lambda h,s: StarNode(s[1], s[3]) term %= term + div + atom, lambda h,s: DivNode(s[1], s[3]) # <atom> atom %= idx, lambda h,s: VariableNode(s[1]) atom %= num, lambda h,s: ConstantNumNode(s[1]) atom %= string, lambda h,s: StringNode(s[1]) atom %= _self, lambda h,s: SelfNode(s[1]) atom %= true, lambda h,s: BooleanNode(s[1]) atom %= false, lambda h,s: BooleanNode(s[1]) atom %= opar + expr + cpar, lambda h,s: s[2] ##### atom %= _not + atom, lambda h,s: NotNode(s[2]) atom %= func_call, lambda h,s: s[1] atom %= new + idx, lambda h,s: InstantiateNode(s[2]) atom %= isvoid + atom, lambda h,s: IsVoidNode(s[2]) atom %= complement + atom, lambda h,s: ComplementNode(s[2]) atom %= idx + left_arrow + atom, lambda h,s: AssignNode(s[1], s[3]) atom %= if_expr, lambda h,s: s[1] atom %= while_expr, lambda h,s: s[1] atom %= block, lambda h,s: s[1] atom %= let_expr, lambda h,s: s[1] atom %= case_expr, lambda h,s: s[1] # <if-expr> if_expr %= _if + expr + then + expr + _else + expr + fi, lambda h,s: ConditionalNode(s[2], s[4], s[6]) # <while-expr> while_expr %= _while + expr + loop + expr + pool, lambda h,s: LoopNode(s[2], s[4]) # <block> block %= ocur + expr_list + ccur, lambda h,s: BlockNode(s[2]) # <let-expr> let_expr %= let + let_att + _in + atom, lambda h,s: LetNode(s[2], s[4]) # <let-att> let_att %= var_decl, lambda h,s: [ s[1] ] let_att %= var_decl + comma + let_att, lambda h,s: [ s[1] ] + s[3] # <var-decl> var_decl %= idx + colon + idx, lambda h,s: VarDeclarationNode(s[1], s[3]) var_decl %= idx + colon + idx + left_arrow + expr, lambda h,s: VarDeclarationNode(s[1], s[3], s[5]) # <case-expr> case_expr %= case + expr + of + case_body + esac, lambda h,s: CaseNode(s[2], s[4]) # <case-body> case_body %= idx + colon + idx + right_arrow + expr + semi, lambda h,s: [ BranchNode(s[1], s[3], s[5]) ] case_body %= idx + colon + idx + right_arrow + expr + semi + case_body, lambda h,s: [ BranchNode(s[1], s[3], s[5]) ] + s[7] # <expr-list> expr_list %= expr + semi, lambda h,s: [ s[1] ] expr_list %= expr + semi + expr_list, lambda h,s: [ s[1] ] + s[3] # <func-call> func_call %= idx + opar + arg_list + cpar, lambda h,s: CallNode(s[1], s[3]) func_call %= idx + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(VariableNode(s[1]), s[3], s[5]) func_call %= _self + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(SelfNode(s[1]), s[3], s[5]) func_call %= opar + expr + cpar + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(s[2], s[5], s[7]) func_call %= idx + at + idx + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(s[1], s[5], s[7], s[3]) func_call %= opar + expr + cpar + at + idx + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(s[2], s[7], s[9], s[5]) func_call %= func_call + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(s[1], s[3], s[5]) func_call %= func_call + at + idx + dot + idx + opar + arg_list + cpar, lambda h,s: DispatchNode(s[1], s[5], s[7], s[3]) # <arg-list> arg_list %= G.Epsilon, lambda h,s: [] arg_list %= arg_list_formal, lambda h,s: s[1] # <arg-list-formal> arg_list_formal %= expr, lambda h,s: [ s[1] ] arg_list_formal %= expr + comma + arg_list_formal, lambda h,s: [ s[1] ] + s[3] return G, idx, num, string, ocur, ccur, semi
def get_grammar_from_text(gramm_text, errors): """ returns a Grammar form gramm_text\n gramm_text: is the grammar written by the user\n NonTerminal_i ~ Production1_1 | ... | Production1_N;\n ... NonTerminal_j ~ ProductionQ_1 | ... | ProductionZ_P;\n """ gramm_text = fix_gram_text(gramm_text,errors) if not gramm_text: return None G = Grammar() dic_sym = {} gramm = gramm_text.split('\n') index = 0 distinguish = '' symbols = set() non_terminals = set() for s in gramm: if s: s = s.split(' > ') if not distinguish: distinguish = s[0] non_terminals.add(s[0]) symbols.add(s[0]) for ps in s[1].split(' | '): for q in ps.split(' '): symbols.add(q) terminals = symbols.difference(non_terminals) non_terminals.remove(distinguish) S = G.NonTerminal(distinguish,True) dic_sym[distinguish] = S non_ter = ' '.join(non_terminals) non_ter = G.NonTerminals(non_ter) for x in non_ter: dic_sym[x.Name] = x ter = ' '.join(terminals) ter = G.Terminals(ter) for x in ter: dic_sym[x.Name] = x dic_sym.update({"epsilon":G.Epsilon, "$":G.EOF}) s = gramm[index] index+=1 while s != "": s = s.split(" > ") q = s[1].split(" | ") for prod in q: p = prod.split(" ") try: temp = dic_sym[p[0]] except KeyError: errors.append(f'{p[0]} is not defined in the terminals or in the non_terminals') break for ter in p[1:]: try: temp += dic_sym[ter] except KeyError: errors.append(f'{ter} is not defined in the terminals or in the non_terminals') break try: dic_sym[s[0]] %= temp except TypeError: errors.append(f'A Non Terminal cant be left part of a production: {s}') break s = gramm[index] index+=1 return G
def define_cool_grammar(print_grammar=False): # grammar G = Grammar() # non-terminals program = G.NonTerminal("<program>", startSymbol=True) class_list, def_class = G.NonTerminals("<class-list> <def-class>") feature_list, def_attr, def_func = G.NonTerminals( "<feature-list> <def-attr> <def-func>") param_list, param_list_rest, param = G.NonTerminals( "<param-list> <param-list-rest> <param>") expr, not_exp, comp, arith, term, factor, element, atom = G.NonTerminals( "<expr> <not_exp> <comp> <arith> <term> <factor> <element> <atom>") identifiers_list, identifier_init = G.NonTerminals( "<ident-list> <ident-init>") block, case_block, case_item = G.NonTerminals( "<block> <case-block> <case-item>") func_call, arg_list, arg_list_rest = G.NonTerminals( "<func-call> <arg-list> <arg-list-rest>") # terminals classx, inherits, notx, isvoid = G.Terminals("class inherits not isvoid") let, inx = G.Terminals("let in") ifx, then, elsex, fi = G.Terminals("if then else fi") whilex, loop, pool = G.Terminals("while loop pool") case, of, esac = G.Terminals("case of esac") semi, colon, comma, dot, opar, cpar, ocur, ccur, at, larrow, rarrow = G.Terminals( "; : , . ( ) { } @ <- =>") equal, plus, minus, star, div, less, equal, lesseq, neg = G.Terminals( "= + - * / < = <= ~") idx, type_id, num, new, string, true, false = G.Terminals( "id type_id int new string true false") # productions program %= class_list, lambda h, s: ProgramNode(s[1]) class_list %= def_class + class_list, lambda h, s: [s[1]] + s[2] class_list %= def_class, lambda h, s: [s[1]] def_class %= ( classx + type_id + ocur + feature_list + ccur + semi, lambda h, s: ClassDeclarationNode(s[2], s[4], s[1]), ) def_class %= ( classx + type_id + inherits + type_id + ocur + feature_list + ccur + semi, lambda h, s: ClassDeclarationNode(s[2], s[6], s[1], s[4]), ) feature_list %= def_attr + semi + feature_list, lambda h, s: [s[1]] + s[3] feature_list %= def_func + semi + feature_list, lambda h, s: [s[1]] + s[3] feature_list %= G.Epsilon, lambda h, s: [] def_attr %= ( idx + colon + type_id + larrow + expr, lambda h, s: AttrDeclarationNode(s[1], s[3], s[5], s[4]), ) def_attr %= idx + colon + type_id, lambda h, s: AttrDeclarationNode( s[1], s[3], token=s[2]) def_func %= ( idx + opar + param_list + cpar + colon + type_id + ocur + expr + ccur, lambda h, s: FuncDeclarationNode(s[1], s[3], s[6], s[8], s[2]), ) param_list %= param + param_list_rest, lambda h, s: [s[1]] + s[2] param_list %= param, lambda h, s: [s[1]] param_list %= G.Epsilon, lambda h, s: [] param_list_rest %= comma + param + param_list_rest, lambda h, s: [s[2] ] + s[3] param_list_rest %= comma + param, lambda h, s: [s[2]] param %= idx + colon + type_id, lambda h, s: (s[1], s[3]) expr %= idx + larrow + expr, lambda h, s: AssignNode(s[1], s[3], s[2]) expr %= let + identifiers_list + inx + expr, lambda h, s: LetNode( s[2], s[4], s[1]) expr %= notx + comp, lambda h, s: NotNode(s[2], s[1]) expr %= comp, lambda h, s: s[1] identifiers_list %= ( identifier_init + comma + identifiers_list, lambda h, s: [s[1]] + s[3], ) identifiers_list %= identifier_init, lambda h, s: [s[1]] identifier_init %= ( idx + colon + type_id + larrow + expr, lambda h, s: VarDeclarationNode(s[1], s[3], s[5]), ) identifier_init %= idx + colon + type_id, lambda h, s: VarDeclarationNode( s[1], s[3]) comp %= arith + less + arith, lambda h, s: LessNode(s[1], s[3], s[2]) comp %= arith + less + notx + expr, lambda h, s: LessNode( s[1], NotNode(s[4], s[3]), s[2]) comp %= arith + equal + arith, lambda h, s: EqualNode(s[1], s[3], s[2]) comp %= arith + equal + notx + expr, lambda h, s: EqualNode( s[1], NotNode(s[4], s[3]), s[2]) comp %= arith + lesseq + arith, lambda h, s: LessEqualNode( s[1], s[3], s[2]) comp %= arith + lesseq + notx + expr, lambda h, s: LessEqualNode( s[1], NotNode(s[4], s[3]), s[2]) comp %= arith, lambda h, s: s[1] arith %= arith + plus + term, lambda h, s: PlusNode(s[1], s[3], s[2]) arith %= arith + minus + term, lambda h, s: MinusNode(s[1], s[3], s[2]) arith %= term, lambda h, s: s[1] term %= term + star + factor, lambda h, s: StarNode(s[1], s[3], s[2]) term %= term + div + factor, lambda h, s: DivNode(s[1], s[3], s[2]) term %= factor, lambda h, s: s[1] factor %= isvoid + element, lambda h, s: IsvoidNode(s[2], s[1]) factor %= neg + element, lambda h, s: NegNode(s[2], s[1]) factor %= element, lambda h, s: s[1] element %= ( ifx + expr + then + expr + elsex + expr + fi, lambda h, s: IfNode(s[2], s[4], s[6], s[1]), ) element %= whilex + expr + loop + expr + pool, lambda h, s: WhileNode( s[2], s[4], s[1]) element %= case + expr + of + case_block + esac, lambda h, s: CaseNode( s[2], s[4], s[1]) element %= new + type_id, lambda h, s: InstantiateNode(s[2], s[1]) element %= opar + expr + cpar, lambda h, s: s[2] element %= ocur + block + ccur, lambda h, s: BlockNode(s[2], s[1]) element %= (element + dot + func_call, lambda h, s: CallNode(*s[3], obj=s[1], token=s[2])) element %= ( element + at + type_id + dot + func_call, lambda h, s: CallNode(*s[5], obj=s[1], at_type=s[3], token=s[2]), ) element %= func_call, lambda h, s: CallNode(*s[1], ) element %= atom, lambda h, s: s[1] case_block %= case_item + case_block, lambda h, s: [s[1]] + s[2] case_block %= case_item, lambda h, s: [s[1]] case_item %= ( idx + colon + type_id + rarrow + expr + semi, lambda h, s: CaseItemNode(s[1], s[3], s[5], s[4]), ) atom %= num, lambda h, s: ConstantNumNode(s[1]) atom %= idx, lambda h, s: VariableNode(s[1]) atom %= ( true, lambda h, s: BooleanNode(s[1]), ) atom %= false, lambda h, s: BooleanNode(s[1]) atom %= string, lambda h, s: StringNode(s[1]) block %= expr + semi, lambda h, s: [s[1]] block %= expr + semi + block, lambda h, s: [s[1]] + s[3] func_call %= idx + opar + arg_list + cpar, lambda h, s: (s[1], s[3]) arg_list %= expr + arg_list_rest, lambda h, s: [s[1]] + s[2] arg_list %= expr, lambda h, s: [s[1]] arg_list %= G.Epsilon, lambda h, s: [] arg_list_rest %= comma + expr + arg_list_rest, lambda h, s: [s[2]] + s[3] arg_list_rest %= comma + expr, lambda h, s: [s[2]] if print_grammar: print(G) return (G, idx, type_id, string, num)
from cmp.pycompiler import Grammar, NonTerminal, Terminal from cool.ast.cool_ast import * G = Grammar() # non-terminals program = G.NonTerminal('<program>', startSymbol=True) class_list, def_class = G.NonTerminals('<class-list> <def-class>') feature_list, def_attr, def_func, feature = G.NonTerminals( '<feature-list> <def-attr> <def-func> <feature>') param_list, param, expr_list = G.NonTerminals( '<param-list> <param> <expr-list>') expr, boolean, compare, arith, term, factor, negate, atom = \ G.NonTerminals('<expr> <boolean> <compare> <arith> <term> <negate> <factor> <atom>') func_call, arg_list, dispatch = G.NonTerminals( '<func-call> <arg-list> <dispatch>') def_var, def_var_list = G.NonTerminals('<def-var> <def-var-list>') case_check, case_check_list = G.NonTerminals('<case-check> <case-check-list>') param_list_not_empty, arg_list_not_empty = G.NonTerminals( '<param-list-not-empty> <arg-list-not-empty>') # Terminals ifx, then, elsex, if_r, whilex, loop, loop_r = G.Terminals( 'if then else fi while loop pool') ocur, ccur, colon, semi, comma, dot = G.Terminals('{ } : ; , .') opar, cpar, plus, minus, div, star, notx, roof = G.Terminals( '( ) + - / * not ~') less, less_eq, greater, greater_eq, equal = G.Terminals('< <= > >= =') let, inx, case, of, case_r, arrow, assign = G.Terminals( 'let in case of esac => <-')
items_to_expand = items_to_conflict_way + items_conflict_way #print(items_to_expand) conflict_string = FindConflictString(items_to_expand) if not conflict_string is None: text += "Cadena de conflicto:\n" for char in conflict_string: text += str(char) text += "\n" return text, automaton, parser G = Grammar() A = G.NonTerminal('A', True) B, C, D = G.NonTerminals('B C D') a, b, c, d = G.Terminals('a b c d') """ S %= E E %= T | E + plus + T T %= F | T + mul + F F %= n | opar + E + cpar """ A %= B + d | C + d B %= b + a C %= b + a + d text, automaton, parser = Execute_SLR1(G)
def COOL_grammar(): # grammar G = Grammar() # non-terminals program = G.NonTerminal('<program>', startSymbol=True) class_list, def_class = G.NonTerminals('<class-list> <def-class>') feature_list, def_attr, def_func = G.NonTerminals( '<feature-list> <def-attr> <def-func>') param_list, param, expr_list = G.NonTerminals( '<param-list> <param> <expr-list>') expr, arith, term, factor, atom = G.NonTerminals( '<expr> <arith> <term> <factor> <atom>') func_call, arg_list = G.NonTerminals('<func-call> <arg-list>') let_body, case_body = G.NonTerminals('<let-body> <case-body>') assig, negat, cmpx, is_void, compl, insta_func = G.NonTerminals( '<assig> <negat> <cmp> <isvoid> <compl> <insta_func>') # terminals classx, let, inx, inherits = G.Terminals('class let in inherits') semi, colon, comma, dot, opar, cpar, ocur, ccur, _arrow, arrow = G.Terminals( '; : , . ( ) { } <- =>') equal, plus, minus, star, div = G.Terminals('= + - * /') idx, num, new = G.Terminals('id int new') ifx, then, elsex, fi = G.Terminals('if then else fi') whilex, loop, pool = G.Terminals('while loop pool') casex, of, esac = G.Terminals('case of esac') notx, tilde, great, egreat, isvoid, a = G.Terminals('not ~ < <= isvoid @') #productions program %= class_list, lambda h, s: ProgramNode(s[1]) class_list %= def_class + class_list, lambda h, s: [s[1]] + s[2] class_list %= G.Epsilon, lambda h, s: [] def_class %= classx + idx + ocur + feature_list + ccur, lambda h, s: ClassDeclarationNode( s[2], s[4]) def_class %= classx + idx + inherits + idx + ocur + feature_list + ccur, lambda h, s: ClassDeclarationNode( s[2], s[6], s[4]) feature_list %= def_attr + semi + feature_list, lambda h, s: [s[1]] + s[2] feature_list %= def_func + semi + feature_list, lambda h, s: [s[1]] + s[2] feature_list %= G.Epsilon, lambda h, s: [] def_attr %= idx + colon + idx, lambda h, s: AttrDeclarationNode(s[1], s[3]) def_attr %= idx + colon + idx + _arrow + expr, lambda h, s: AttrDeclarationNode( s[1], s[3], s[5]) def_func %= idx + opar + param_list + cpar + colon + idx + ocur + expr + ccur, lambda h, s: FuncDeclarationNode( s[1], s[3], s[6], s[8]) param_list %= param, lambda h, s: [s[1]] param_list %= param + comma + param_list, lambda h, s: [s[1]] + s[3] param_list %= G.Epsilon, lambda h, s: [] param %= idx + colon + idx, lambda h, s: (s[1], s[3]) expr %= let + let_body + inx + expr, lambda h, s: LetNode(s[2], s[4]) expr %= ifx + expr + then + expr + elsex + expr + fi, lambda h, s: IfNode( s[2], s[4], s[6]) expr %= whilex + expr + loop + expr + pool, lambda h, s: WhileNode( s[2], s[4]) expr %= ocur + expr_list + ccur, lambda h, s: BlocksNode(s[2]) expr %= casex + expr + of + case_body + esac, lambda h, s: CaseNode( s[2], s[4]) expr %= assig, lambda h, s: s[1] expr_list %= expr + semi + expr_list, lambda h, s: [s[1]] + s[3] expr_list %= G.Epsilon, lambda h, s: [] let_body %= idx + colon + idx + comma + let_body, lambda h, s: [ VarDeclarationNode(s[1], s[3]) ] + s[5] let_body %= idx + colon + idx + _arrow + expr + comma + let_body, lambda h, s: [ VarDeclarationNode(s[1], s[3], s[5]) ] + s[7] let_body %= G.Epsilon, lambda h, s: [] case_body %= idx + colon + idx + semi + case_body, lambda h, s: [ SingleCaseNode(s[1], s[3]) ] + s[5] case_body %= idx + colon + idx + arrow + expr + semi + case_body, lambda h, s: [ SingleCaseNode(s[1], s[3], s[5]) ] + s[7] case_body %= G.Epsilon, lambda h, s: [] assig %= idx + _arrow + assig, lambda h, s: AssignNode(s[1], s[3]) assig %= negat, lambda h, s: s[1] negat %= notx + negat, lambda h, s: NotNode(s[2]) negat %= cmpx, lambda h, s: s[1] cmpx %= cmpx + great + cmpx, lambda h, s: GreatNode(s[1], s[3]) cmpx %= cmpx + egreat + cmpx, lambda h, s: EqualsGreatNode(s[1], s[3]) cmpx %= cmpx + equal + cmpx, lambda h, s: EqualsNode(s[1], s[3]) cmpx %= arith, lambda h, s: s[1] arith %= arith + plus + term, lambda h, s: PlusNode(s[1], s[3]) arith %= arith + minus + term, lambda h, s: MinusNode(s[1], s[3]) arith %= term, lambda h, s: s[1] term %= term + star + factor, lambda h, s: StarNode(s[1], s[3]) term %= term + div + factor, lambda h, s: DivNode(s[1], s[3]) term %= factor, lambda h, s: s[1] factor %= isvoid + factor, lambda h, s: IsVoidNode(s[2]) factor %= is_void, lambda h, s: s[1] is_void %= tilde + is_void, lambda h, s: ComplementNode(s[2]) is_void %= compl, lambda h, s: s[1] is_void %= num, lambda h, s: ConstantNumNode(s[1]) compl %= compl + a + idx + dot + func_call, lambda h, s: ParentCallNode( s[1], s[3], s[5][1], s[5][2]) compl %= insta_func, lambda h, s: s[1] insta_func %= insta_func + dot + func_call, lambda h, s: InstanceCallNode( s[1], s[3][1], s[3][2]) insta_func %= atom, lambda h, s: s[1] atom %= func_call, lambda h, s: CallNode(s[1][1], s[1][2]) atom %= opar + expr + cpar, lambda h, s: s[2] atom %= idx, lambda h, s: VariableNode(s[1]) atom %= new + idx + opar + cpar, lambda h, s: InstantiateNode(s[2]) func_call %= idx + opar + arg_list + cpar, lambda h, s: (s[1], s[3]) arg_list %= expr, lambda h, s: [s[1]] arg_list %= expr + comma + arg_list, lambda h, s: [s[1]] + s[3] return G