def main(self, *args): if len(args) == 2: input_prefix, output_prefix = args mode = MODE_STRING elif len(args) == 3: input_prefix, output_prefix, mode_s = args if mode_s == "string": mode = MODE_STRING elif mode_s == "tree": mode = MODE_TREE else: raise InputFormatException("mode must be string or tree") else: raise InvocationException() grammar = Rule.load_from_file(input_prefix) binarized_grammar = {} next_rule_id = 0 for rule_id in grammar: rule = grammar[rule_id] if mode == MODE_STRING: b_rules, next_rule_id = rule.binarize(next_rule_id) else: # mode = MODE_TREE b_rules, next_rule_id = rule.binarize_tree(next_rule_id) if not b_rules: continue for b_rule in b_rules: binarized_grammar[b_rule.rule_id] = b_rule Rule.write_to_file(binarized_grammar, output_prefix)
def __remove_left_factoring(grammar): new_grammar = Grammar(start=grammar.start, epsilon=grammar.epsilon, eof=grammar.eof) new_productions = [] for nonterminal in grammar.nonterminals: productions = grammar.productions_for(nonterminal) if len(productions) > 1: prefixes = get_prefixes(productions) for prefix, v in prefixes.items(): if (len(v) == 1): new_productions.append(Rule(nonterminal, tuple(v[0]))) continue new_x = __generate_key(grammar, nonterminal) body = [prefix] + [new_x] new_productions.append(Rule(nonterminal, tuple(body))) for prod in v: if not prod: new_productions.append( Rule(new_x, tuple([grammar.epsilon]))) else: new_productions.append(Rule(new_x, tuple(prod))) else: new_productions.append(Rule(nonterminal, tuple(productions[0]))) for prod in new_productions: new_grammar.add_rule(prod) return __normalize_productions(new_grammar)
def main(self, *args): if len(args) == 2: input_prefix, output_prefix = args mode = MODE_STRING elif len(args) == 3: input_prefix, output_prefix, mode_s = args if mode_s == 'string': mode = MODE_STRING elif mode_s == 'tree': mode = MODE_TREE else: raise InputFormatException("mode must be string or tree") else: raise InvocationException() grammar = Rule.load_from_file(input_prefix) binarized_grammar = {} next_rule_id = 0 for rule_id in grammar: rule = grammar[rule_id] if mode == MODE_STRING: b_rules, next_rule_id = rule.binarize(next_rule_id) else: # mode = MODE_TREE b_rules, next_rule_id = rule.binarize_tree(next_rule_id) if not b_rules: continue for b_rule in b_rules: binarized_grammar[b_rule.rule_id] = b_rule Rule.write_to_file(binarized_grammar, output_prefix)
def test_parsing(self): h = Grammar(start='P') h.add_rule(Rule('P', ('D', ))) h.add_rule(Rule('D', ('T', ':', 'id', ';', 'D'))) h.add_rule(Rule('D', ('ε', ))) h.add_rule(Rule('T', ('real', ))) h.add_rule(Rule('T', ('int', ))) self.assertEqual(self.g, h)
def parse_bnf(text, epsilon='ε', eof='$'): """ Parse BNF from text :param text: grammar especification :param epsilon: empty symbol :param eof: EOF symbol :return: a grammar Productions use the following format: Start -> A A -> ( A ) | Two Two -> a Two -> b """ try: productions = [ p for p in text.strip().split('\n') if not p.startswith('#') ] start = productions[0].split( '->')[0].strip() # First rule as starting symbol g = Grammar(start=start, epsilon=epsilon, eof=eof) for r in productions: head, body = [x.strip() for x in r.split('->')] productions = [p.strip() for p in body.split('|')] productions_tokenized = [tuple(p.split()) for p in productions] for p in productions_tokenized: g.add_rule(Rule(head, p)) return g except ValueError: raise InvalidGrammar("Invalid grammar", text)
def setUp(self): self.a = Grammar(start='X') self.b = Grammar(start='X') self.c = Grammar(start='X') self.a.add_rule(Rule('X', ('hello', 'Y'))) self.a.add_rule(Rule('Y', ('world Z', ))) self.a.add_rule(Rule('Z', ('?', ))) self.a.add_rule(Rule('Z', ('!', ))) # B will have productions in different order self.b.add_rule(Rule('Y', ('world Z', ))) self.b.add_rule(Rule('Z', ('!', ))) self.b.add_rule(Rule('Z', ('?', ))) self.b.add_rule(Rule('X', ('hello', 'Y'))) self.c.add_rule(Rule('X', ('bye', 'Y'))) self.c.add_rule(Rule('Y', ('cruel world', )))
def remove_immediate_left_recursion(grammar, A): """ Remove immediate left-recursion for given nonterminal :param grammar: input grammar :param A: the nonterminal :return: list of equivalent productions. If there are no left-recursions, the productions aren't changed. For each production: A -> A a1 | A a2 | ... | A am | b1 | b2 | ... | bn Replace with: A -> b1 A' | b2 A' | ... | bn A' A' -> a1 A' | a2 A' | ... | am A' | ε """ productions = grammar.productions[A] recursive = [] nonrecursive = [] new_productions = [] for p in productions: if p.is_left_recursive(): recursive.append(p.body) else: nonrecursive.append(p.body) if not recursive: return productions new_A = __generate_key(grammar, A) for b in nonrecursive: # A -> b1 A' | ... | bn A' new_productions.append(Rule(A, b + (new_A, ))) for a in recursive: # A' -> a1 A' | a2 A' | ... | am A' new_productions.append(Rule(new_A, a[1:] + (new_A, ))) # A' -> ε new_productions.append(Rule(new_A, (grammar.epsilon, ))) return new_productions
def test_parsing_table(self): correct = { ('P', 'real'): Rule('P', ('D', )), ('P', 'int'): Rule('P', ('D', )), ('P', '$'): Rule('P', ('D', )), ('D', 'int'): Rule('D', ('T', ':', 'id', ';', 'D')), ('D', 'real'): Rule('D', ('T', ':', 'id', ';', 'D')), ('D', '$'): Rule('D', ('ε', )), ('T', 'int'): Rule('T', ('int', )), ('T', 'real'): Rule('T', ('real', )) } table, amb = self.g.parsing_table() self.assertFalse(amb) self.assertEqual(table, correct)
def test_simple(self): a = Grammar(start='E') a.add_rule(Rule('E', ('E', '+', 'T'))) a.add_rule(Rule('E', ('T', ))) a.add_rule(Rule('T', ('T', '*', 'F'))) a.add_rule(Rule('T', ('F', ))) a.add_rule(Rule('F', ('(', 'E', ')'))) a.add_rule(Rule('F', ('id', ))) text = str(a) g = f.parse_bnf(text) self.assertEqual(a, g)
def remove_left_recursion(g): """ Remove all left recursions from grammar :param g: input grammar :return: equivalent grammar with no left-recursions """ temp_grammar = copy(g) new_grammar = Grammar(start=temp_grammar.start, epsilon=temp_grammar.epsilon, eof=temp_grammar.eof) nonterminals = nonterminal_ordering(temp_grammar) for i in range(0, len(nonterminals)): ai = nonterminals[i] for j in range(0, i): aj = nonterminals[j] for p_ai in temp_grammar.productions[ai]: # For each production of the form Ai -> Aj y if p_ai.body and aj == p_ai.body[0]: replaced_productions = [ Rule(ai, p_aj.body + p_ai.body[1:]) for p_aj in temp_grammar.productions[aj] ] can_remove_productions = any( map(lambda x: x.is_left_recursive(), replaced_productions)) # Replace productions only if there were left-recursive ones if can_remove_productions: temp_grammar.remove_rule(p_ai) for p in replaced_productions: temp_grammar.add_rule(p) new_productions = remove_immediate_left_recursion(temp_grammar, ai) for p in new_productions: new_grammar.add_rule(p) return __normalize_productions(new_grammar)
def test_immediate_recursion(self): p = Rule('E', ('E', '+' 'T')) self.assertTrue(p.is_left_recursive())
def test_book_example(self): g = f.parse_bnf(test_data.unsolved_left_recursion) # Trust me, this is the answer answer = { ("T'", '+'): Rule("T'", ('ε', )), ('F', 'id'): Rule('F', ('id', )), ("E'", '+'): Rule("E'", ('+', 'T', "E'")), ('E', '('): Rule('E', ('T', "E'")), ('T', '('): Rule('T', ('F', "T'")), ("E'", '$'): Rule("E'", ('ε', )), ("T'", '*'): Rule("T'", ('*', 'F', "T'")), ("T'", ')'): Rule("T'", ('ε', )), ("T'", '$'): Rule("T'", ('ε', )), ("E'", ')'): Rule("E'", ('ε', )), ('T', 'id'): Rule('T', ('F', "T'")), ('E', 'id'): Rule('E', ('T', "E'")), ('F', '('): Rule('F', ('(', 'E', ')')) } table, amb = g.parsing_table(is_clean=False) self.assertFalse(amb) self.assertEqual(table, answer)