def execute_node(node:TrieNode,left,right:list,grammar:Grammar,idx): """ Fills productions with the new grammar productions\n left: left part of the production\n right: right part of the production in a list\n productions: all the productions of the grammar without common prefixes\n idx: index of the generated name """ right.append(node.value) if len(node.sons) > 1 or (len(node.sons) == 1 and node.terminal): name,idx = generate_name(grammar,idx,left.Name) new_prod = grammar.NonTerminal(name) right.append(new_prod) grammar.Add_Production(Production(left,SentenceFromIter(right))) left = new_prod if node.terminal: grammar.Add_Production(Production(left,grammar.Epsilon)) for x in node.sons: right = [] execute_node(node.sons[x],left,right,grammar,idx) elif len(node.sons) == 1: for key in node.sons: execute_node(node.sons[key],left,right,grammar,idx) break else: grammar.Add_Production(Production(left,SentenceFromIter(right)))
def fix_non_terminal_left_recursion(non_terminal:NonTerminal, grammar, errors): ''' Fix immediate left recursion non_terminal in grammar\n ''' new_name,idx = generate_name(grammar,0,non_terminal.Name) new_non_terminal = grammar.NonTerminal(new_name) left_prod = non_terminal.productions non_terminal.productions = [] new_prod_new_n_ter = set() new_prod_old_n_ter = set() for prod in left_prod: if not prod.Right.IsEpsilon and prod.Right[0] == non_terminal: if len(prod.Right) > 1: sentence = [x for x in prod.Right[1:]] sentence.append(new_non_terminal) new_prod_new_n_ter.add(Production(new_non_terminal,SentenceFromIter(sentence))) else: sentence = [x for x in prod.Right] sentence.append(new_non_terminal) new_prod_new_n_ter.add(Production(new_non_terminal,grammar.Epsilon)) new_prod_old_n_ter.add(Production(non_terminal,SentenceFromIter(sentence))) for prod in new_prod_new_n_ter: grammar.Add_Production(prod) if not new_prod_old_n_ter: errors.append(f'All productions of {non_terminal} begins with {non_terminal}, no string can be parsed by a left parse') for prod in new_prod_old_n_ter: grammar.Add_Production(prod)
def eliminate_left_recursion(G: Grammar): recursive_prod = {} for production in G.Productions: if production.Left == production.Right[0]: non_terminal = production.Left for prod in non_terminal.productions: try: recursive_prod[non_terminal].add(prod) except KeyError: recursive_prod[non_terminal] = {prod} for non_terminal in recursive_prod.keys(): new_non_teminal = G.NonTerminals(non_terminal.Name + "'") for prod in recursive_prod[non_terminal]: new_sentence = Sentence() if prod.Right[0] == non_terminal: for i in range(1, len(prod.Right)): new_sentence += prod.Right[i] new_sentence += new_non_teminal[0] new_production = Production(new_non_teminal[0], new_sentence) G.Productions.append(new_production) else: for i in range(len(prod.Right)): new_sentence += prod.Right[i] new_sentence += new_non_teminal[0] new_production = Production(non_terminal, new_sentence) G.Productions.append(new_production) G.Productions.remove(prod) G.Productions.append(Production(new_non_teminal[0], G.Epsilon))
def fix_non_derive_terminal(gramm:Grammar,return_derivations = False,left_derivation = True): """ Remove from gramm the non terminals A that dont satisfy:\n A->*w where w in {G.T}* return grammar return grammar,derivation """ gramm = gramm.copy() derivation = { x:[Production(x,Sentence(x)),] for x in gramm.terminals } derivation[gramm.Epsilon] = [Production(gramm.Epsilon,Sentence(gramm.Epsilon)),] derive_something = set(gramm.terminals) productions = set(gramm.Productions) change = -1 while change != len(derive_something): change = len(derive_something) to_remove = [] for x in productions: if not any([y for y in x.Right if not y in derive_something]): # if y ->* w with w in T* derive_something.add(x.Left) update_derivation(x,derivation,left_derivation) to_remove.append(x) for x in to_remove: productions.remove(x) remove_unnecessary_symbols(gramm,[x for x in gramm.nonTerminals if x not in derive_something]) if return_derivations: return gramm,derivation return gramm
def add_new_productions(prod,new_productions,n_t_epsilon): """ Add to new_productions the corresponding productions from\n permutate prod with the non_terminals in n_t_epsilon \n Ex: A->BCB,n_t_eps={B}\n add to new_productions {A->BCB, A->CB, A->BC, A->C} """ had_epsilon = [False]*len(prod.Right) cant_epsilon = 0 for i,x in enumerate(prod.Right): if x in n_t_epsilon: had_epsilon[i] = True cant_epsilon += 1 for perm in permutation(cant_epsilon,2): new_prod_right = [] perm_idx = 0 for i,x in enumerate(had_epsilon): if not x: new_prod_right.append(prod.Right[i]) elif perm[perm_idx]: new_prod_right.append(prod.Right[i]) perm_idx = perm_idx+1 if x else perm_idx if new_prod_right: new_productions.add(Production(prod.Left,SentenceFromIter(new_prod_right))) if cant_epsilon == 0: new_productions.add(Production(prod.Left,prod.Right))
def fix_e_productions(gramm:Grammar): """ Returns a grammar without epsilon transitions.\n if gramm recognize epsilon then an augmented grammar\n is return with an epsilon transition on the start symbol """ G = gramm G,n_t_epsilon = remove_e_productions(G) if G.startSymbol in n_t_epsilon: G = G.AugmentedGrammar(force=True) G.Add_Production(Production(G.startSymbol,G.Epsilon)) else: G = G.copy() new_productions = set() for prod in gramm.Productions: add_new_productions(prod,new_productions,n_t_epsilon) for x in gramm.nonTerminals: x.productions = [] for prod in new_productions: G.Add_Production(prod) return G
def create_productions(G, left, right, new=False, new_symbol=None): _G = Grammar() left = NonTerminal(left, _G) list_symbols = [] if right == G.Epsilon: return Production(left, _G.Epsilon) for r in right: s = search(G, r) if not s: s = NonTerminal(r, _G) list_symbols.append(s) if new: s = NonTerminal(new_symbol, _G) list_symbols.append(s) right = Sentence(list_symbols[0]) for i in range(1, len(list_symbols)): right = right + list_symbols[i] return Production(left, right)
def remove_ambiguity(G: Grammar): """ Transforms productions of a non terminal for remove ambiguity. """ change = True while change: change = False prods = G.Productions for nt in G.nonTerminals: p_dict = {} # pi.Right[0] : {p1, p2, ..., pn} for p in nt.productions: if p.IsEpsilon: continue try: p_dict[p.Right[0].Name].append(p) except KeyError: p_dict[p.Right[0].Name] = [p] next_appendix = "'" for p_set in p_dict.values(): if len( p_set ) > 1: # Means nt has ambiguous production (all in p_set) new_left = G.NonTerminal(nt.Name + next_appendix) next_appendix = next_appendix + "'" for p in p_set: new_right = p.Right[1:] if len(new_right) == 0: prods.append(Production(new_left, G.Epsilon)) else: prods.append( Production(new_left, Sentence(*new_right))) prods.remove(p) prods.append(Production(nt, Sentence(p.Right[0], new_left))) change = True # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions G.Add_Production(p)
def eliminate_left_recursion(G, left, dict): new_prods = [] new_non_terminal = str(left) + '1' ('nnt---->', new_non_terminal) conflict_prods = dict[left] for prod in left.productions: l, r = prod nt = NonTerminal(new_non_terminal, G) if prod not in conflict_prods: ####A ->b1A'|b2A'|...|bnA'#### s_r = [i for i in r] s_r = Sentence(*s_r, nt) new_p = Production(left, s_r) s = '+'.join([str(i) for i in s_r._symbols]) new_prods.append((new_p, f'{left}%={s}')) else: new_r = [i for i in r._symbols] new_r.remove(left) s_r = Sentence(*new_r, nt) new_p = Production(left, s_r) s = '+'.join([str(i) for i in s_r._symbols]) new_prods.append((new_p, f'{new_non_terminal}%={s}')) return new_prods, new_non_terminal
def remove_unit(G: Grammar): """ Removes unit productions from G. Additionally this removes cycles. """ def is_unit(p: Production) -> bool: """ True if production have the form A -> B """ return len(p.Right) == 1 and p.Right[0].IsNonTerminal prods = [prod for prod in G.Productions] unit_prods = [p for p in prods if is_unit(p)] variables = {p.Left.Name: {p.Right[0].Name} for p in unit_prods} change = True while change: change = False for v in variables: l = len(variables[v]) iter_set = {s for s in variables[v]} for s in iter_set: if s == v: # Do not check own set of a variable continue try: for x in variables[s]: if v != x: # Avoids add a key to his set variables[v].add(x) except KeyError: # Reached a symbol that belongs to right part of an unit prod pass # that is not in variables' keys (is not left part of a unit prod) if l != len(variables[v]): change = True # for x in variables.items(): # print(x) for v in variables: for s in variables[v]: for p in G[s].productions: if not is_unit(p): prods.append(Production(G[v], p.Right)) # Replace old productions by new productions # Don't add unit productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions if not is_unit(p): G.Add_Production(p)
def remove_left_recursion(G: Grammar): """ Eliminates all left-recursion for any CFG with no e-productions and no cycles. """ def has_lr(nt: NonTerminal) -> bool: """ True if `nt` has left recursion. """ return any(p.Left == p.Right[0] for p in nt.productions) prods = [p for p in G.Productions] new_prods = [] for nt in G.nonTerminals: if has_lr(nt): new_symbol = G.NonTerminal(nt.Name + "'") for p in nt.productions: if p.Right[0] == p.Left: # Production has the from A -> Axyz new_right = [s for s in p.Right[1:]] new_right.append(new_symbol) new_prods.append( Production(new_symbol, Sentence(*new_right))) else: # Production has the from A -> xyz new_right = [s for s in p.Right[0:]] new_right.append(new_symbol) new_prods.append(Production(p.Left, Sentence(*new_right))) new_prods.append(Production(new_symbol, G.Epsilon)) else: for p in nt.productions: new_prods.append(p) # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in new_prods: # Add new productions G.Add_Production(p)
def shortest_production_path(G, x): """ Compute the shortest poduction path from start symbol of Grammar G to a sentence form thad Contains the Non Temrinal X """ queue = deque([x]) sentence_form = {x: Sentence(x)} production_path = { x: [Production(x, Sentence(x))] } # Eliminar esta linea de testeo productions = set(G.Productions) while queue: current = queue.popleft() visited_productions = set() for production in productions: head, body = production if head in sentence_form: continue sentence = Sentence() current_belong = False for i, symbol in enumerate(body): if symbol == current: current_belong = True sentence += sentence_form[current] else: sentence += symbol if current_belong: queue.append(head) sentence_form[head] = sentence production_path[head] = [production] + production_path[current] visited_productions.add(production) productions -= visited_productions assert G.startSymbol in sentence_form, f'{x} is not reacheable from start symbol {G.startSymbol}' return sentence_form[G.startSymbol], production_path[G.startSymbol][:-1]
def evaluate(self, context): if isinstance(self.der, SentenceNodeGrammar): p = self.der.evaluate(context) context.Productions[context.NTerminals[str( self.izq)]] = Production(context.NTerminals[str(self.izq)], p) context.NTerminals[str(self.izq)].Grammar.Add_Production( Production(context.NTerminals[str(self.izq)], p)) return elif isinstance(self.der, SentencesNodeGrammar): for i in self.der.evaluate(context): sentence = i if isinstance(sentence, SentenceNodeGrammar): sentence = i.evaluate(context) try: context.Productions[context.NTerminals[str( self.izq)]].append( Production(context.NTerminals[str(self.izq)], sentence)) except: context.Productions[context.NTerminals[str(self.izq)]] = [ Production(context.NTerminals[str(self.izq)], sentence) ] context.NTerminals[str(self.izq)].Grammar.Add_Production( Production(context.NTerminals[str(self.izq)], sentence)) return b = context.Terminals[str(self.der)] if str( self.der) in context.Terminals else context.NTerminals[str( self.der)] if (str(self.der) == 'epsilon'): b = context.Grammar.Epsilon # print(b) context.Productions[context.NTerminals[str(self.izq)]] = Production( context.NTerminals[str(self.izq)], Sentence(b)) context.NTerminals[str(self.izq)].Grammar.Add_Production( Production(context.NTerminals[str(self.izq)], Sentence(b)))
def fix_common_prefix(grammar:Grammar): """ returns a copy of grammar without common prefixes """ G = grammar.copy() G.Productions = [] for non_terminal in grammar.nonTerminals: trie = Trie() epsilon = False for x in non_terminal.productions: if not x.Right.IsEpsilon: trie.add(x.Right) else: epsilon = True non_terminal.productions = [] if epsilon: G.Add_Production(Production(x.Left,G.Epsilon)) for node in trie.top.sons: execute_node(trie.top.sons[node],non_terminal,[],G,0) return G
def fix_unit_productions(gramm:Grammar): """ returns an equivalent grammar without productions of the form:\n A -> B """ gramm = gramm.copy() unit_productions = {x for x in gramm.Productions if len(x.Right) == 1 and x.Right[0].IsNonTerminal} new_productions = set() for x in gramm.Productions: if not x in unit_productions: new_productions.add(x) pending = get_unit_tuples(unit_productions) while pending: l,r = pending.pop() for prod in r.productions: if not prod in unit_productions: new_productions.add(Production(l,prod.Right)) return change_grammar_from_productions(gramm,new_productions)
T %= F + Y Y %= star + F + Y | div + F + Y | G.Epsilon F %= num | opar + E + cpar print(G) firsts = compute_firsts(G) follows = compute_follows(G, firsts) M = build_parsing_table(G, firsts, follows) parser = metodo_predictivo_no_recursivo(G, M) left_parse = parser([ num, star, num, star, num, plus, num, star, num, plus, num, plus, num, G.EOF ]) assert left_parse == [ Production(E, Sentence(T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)),
def table(self): G = self.G return { ( G['E'], G['num'], ): [ Production(G['E'], Sentence(G['T'], G['X'])), ], ( G['E'], G['('], ): [ Production(G['E'], Sentence(G['T'], G['X'])), ], ( G['X'], G['+'], ): [ Production(G['X'], Sentence(G['+'], G['T'], G['X'])), ], ( G['X'], G['-'], ): [ Production(G['X'], Sentence(G['-'], G['T'], G['X'])), ], ( G['X'], G[')'], ): [ Production(G['X'], G.Epsilon), ], ( G['X'], G.EOF, ): [ Production(G['X'], G.Epsilon), ], ( G['T'], G['num'], ): [ Production(G['T'], Sentence(G['F'], G['Y'])), ], ( G['T'], G['('], ): [ Production(G['T'], Sentence(G['F'], G['Y'])), ], ( G['Y'], G['*'], ): [ Production(G['Y'], Sentence(G['*'], G['F'], G['Y'])), ], ( G['Y'], G['/'], ): [ Production(G['Y'], Sentence(G['/'], G['F'], G['Y'])), ], ( G['Y'], G[')'], ): [ Production(G['Y'], G.Epsilon), ], ( G['Y'], G['-'], ): [ Production(G['Y'], G.Epsilon), ], ( G['Y'], G.EOF, ): [ Production(G['Y'], G.Epsilon), ], ( G['Y'], G['+'], ): [ Production(G['Y'], G.Epsilon), ], ( G['F'], G['num'], ): [ Production(G['F'], Sentence(G['num'])), ], ( G['F'], G['('], ): [ Production(G['F'], Sentence(G['('], G['E'], G[')'])), ] }
def table(self): G = self.G return { ( G['E'], G['symbol'], ): [ Production(G['E'], Sentence(G['T'], G['X'])), ], ( G['E'], G['ε'], ): [ Production(G['E'], Sentence(G['T'], G['X'])), ], ( G['E'], G['('], ): [ Production(G['E'], Sentence(G['T'], G['X'])), ], ( G['X'], G['|'], ): [ Production(G['X'], Sentence(G['|'], G['E'])), ], ( G['X'], G[')'], ): [ Production(G['X'], G.Epsilon), ], ( G['X'], G.EOF, ): [ Production(G['X'], G.Epsilon), ], ( G['T'], G['symbol'], ): [ Production(G['T'], Sentence(G['F'], G['Y'])), ], ( G['T'], G['ε'], ): [ Production(G['T'], Sentence(G['F'], G['Y'])), ], ( G['T'], G['('], ): [ Production(G['T'], Sentence(G['F'], G['Y'])), ], ( G['Y'], G['symbol'], ): [ Production(G['Y'], Sentence(G['T'])), ], ( G['Y'], G['ε'], ): [ Production(G['Y'], Sentence(G['T'])), ], ( G['Y'], G['('], ): [ Production(G['Y'], Sentence(G['T'])), ], ( G['Y'], G[')'], ): [ Production(G['Y'], G.Epsilon), ], ( G['Y'], G.EOF, ): [ Production(G['Y'], G.Epsilon), ], ( G['Y'], G['|'], ): [ Production(G['Y'], G.Epsilon), ], ( G['F'], G['symbol'], ): [ Production(G['F'], Sentence(G['A'], G['Z'])), ], ( G['F'], G['ε'], ): [ Production(G['F'], Sentence(G['A'], G['Z'])), ], ( G['F'], G['('], ): [ Production(G['F'], Sentence(G['A'], G['Z'])), ], ( G['Z'], G['*'], ): [ Production(G['Z'], Sentence(G['*'])), ], ( G['Z'], G.EOF, ): [ Production(G['Z'], G.Epsilon), ], ( G['Z'], G['|'], ): [ Production(G['Z'], G.Epsilon), ], ( G['Z'], G['('], ): [ Production(G['Z'], G.Epsilon), ], ( G['Z'], G[')'], ): [ Production(G['Z'], G.Epsilon), ], ( G['Z'], G['symbol'], ): [ Production(G['Z'], G.Epsilon), ], ( G['Z'], G['ε'], ): [ Production(G['Z'], G.Epsilon), ], ( G['A'], G['symbol'], ): [ Production(G['A'], Sentence(G['symbol'])), ], ( G['A'], G['ε'], ): [ Production(G['A'], Sentence(G['ε'])), ], ( G['A'], G['('], ): [ Production(G['A'], Sentence(G['('], G['E'], G[')'])), ] }
def remove_epsilon(G: Grammar): """ Removes e-productions from G. """ prods = G.Productions # Find non terminals that derives in epsilon nullables = [] changed = True while changed: changed = False for prod in prods: for symbol in prod.Right: if symbol in nullables: continue elif not symbol.IsEpsilon: break else: if prod.Left not in nullables: nullables.append(prod.Left) changed = True # Decomposing of productions removing one or multiple nullables non terminals # Removing old productions G.Productions = [] for nt in G.nonTerminals: nt.productions = [] # Adding new productions for prod in prods: prod_nullables = { index: symbol for index, symbol in zip(range(len(prod.Right)), prod.Right) \ if symbol in nullables } for i in range(1, len(prod_nullables) + 1): # Size iter for subset in it.combinations(prod_nullables, i): # Subset iter right_part = [] for j in range(len(prod.Right)): if j not in subset: right_part.append(prod.Right[j]) if len(right_part) > 0: new_prod = Production(prod.Left, Sentence(*right_part)) else: new_prod = Production(prod.Left, G.Epsilon) if new_prod not in G.Productions: G.Add_Production(new_prod) # Adding old productions for prod in prods: G.Add_Production(prod) prods = G.Productions G.Productions = [] useless_symbols = [ symbol for symbol in nullables if all(prod.IsEpsilon for prod in symbol.productions) ] # Removing productions that contains non terminals that derive in epsilon for prod in prods: if prod.IsEpsilon or any(symbol in useless_symbols for symbol in prod.Right): continue else: G.Add_Production(prod) # Removing non terminals symbols with no productions for s in useless_symbols: G.nonTerminals.remove(s) G.symbDict.pop(s.Name)
def unit_testing(): G = Grammar() E = G.NonTerminal('E', True) T,F,X,Y = G.NonTerminals('T F X Y') plus, minus, star, div, opar, cpar, num = G.Terminals('+ - * / ( ) num') E %= T + X, lambda h,s: s[2], None, lambda h,s: s[1] X %= plus + T + X, lambda h,s: s[3], None, None, lambda h,s: s[2] + h[0] X %= minus + T + X, lambda h,s: s[3], None, None, lambda h,s: h[0] - s[2] X %= G.Epsilon, lambda h,s: h[0] T %= F + Y, lambda h,s: s[2], None, lambda h,s: s[1] Y %= star + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0] * s[2] Y %= div + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0]/s[2] Y %= G.Epsilon, lambda h,s: h[0] F %= num, lambda h,s: float(s[1]), None F %= opar + E + cpar, lambda h,s: s[2], None, None, None xcool = BasicXCool(G) tokens = [num, star, num, star, num, plus, num, star, num, plus, num, plus, num, G.EOF] M = _build_parsing_table(G,xcool.firsts,xcool.follows) assert M == xcool.table ,"Test Error in build_parsing_table" print(" - buider table ;) ") #################################################################### parser = _buid_parsing_func(G,M) left_parse,error = parser(tokens) assert error == [] assert left_parse == [ Production(E, Sentence(T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, Sentence(star, F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, Sentence(plus, T, X)), Production(T, Sentence(F, Y)), Production(F, Sentence(num)), Production(Y, G.Epsilon), Production(X, G.Epsilon), ] ,"Test Error in parser_library.LL1.parser" print(" - buider func ;) ") ################################################################### fixed_tokens = { '+' : Token( '+', plus ), '-' : Token( '-', minus ), '*' : Token( '*', star ), '/' : Token( '/', div ), '(' : Token( '(', opar ), ')' : Token( ')', cpar ), } def tokenize_text(text): tokens = [] for item in text.split(): try: float(item) token = Token(item, num) except ValueError: try: token = fixed_tokens[item] except: raise Exception('Undefined token') tokens.append(token) eof = Token('$', G.EOF) tokens.append(eof) return tokens text = '5.9 + 4' tokens = [ Token('5.9', num), Token('+', plus), Token('4', num), Token('$', G.EOF) ] left_parse,error = parser(tokens) assert len(left_parse) == 9 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == 9.9,"Test Error in eval parser" text = '1 - 1 - 1' tokens = tokenize_text(text) left_parse,error = parser(tokens) assert len(left_parse) == 13 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == -1,"Test Error in eval parser" text = '1 - ( 1 - 1 )' tokens = tokenize_text(text) left_parse,error = parser(tokens) assert len(left_parse) == 18 and len(error) == 0,"Test Error in parser func" result = _evaluate_parse(left_parse, tokens) assert result == 1,"Test Error in eval parser" print(" - method eval ;) ") ############################################################# return "LL1"