Beispiel #1
0
    def main(self, *args):
        if len(args) == 2:
            input_prefix, output_prefix = args
            mode = MODE_STRING
        elif len(args) == 3:
            input_prefix, output_prefix, mode_s = args
            if mode_s == "string":
                mode = MODE_STRING
            elif mode_s == "tree":
                mode = MODE_TREE
            else:
                raise InputFormatException("mode must be string or tree")
        else:
            raise InvocationException()

        grammar = Rule.load_from_file(input_prefix)
        binarized_grammar = {}

        next_rule_id = 0
        for rule_id in grammar:
            rule = grammar[rule_id]
            if mode == MODE_STRING:
                b_rules, next_rule_id = rule.binarize(next_rule_id)
            else:  # mode = MODE_TREE
                b_rules, next_rule_id = rule.binarize_tree(next_rule_id)
            if not b_rules:
                continue
            for b_rule in b_rules:
                binarized_grammar[b_rule.rule_id] = b_rule

        Rule.write_to_file(binarized_grammar, output_prefix)
Beispiel #2
0
def __remove_left_factoring(grammar):
    new_grammar = Grammar(start=grammar.start,
                          epsilon=grammar.epsilon,
                          eof=grammar.eof)

    new_productions = []

    for nonterminal in grammar.nonterminals:

        productions = grammar.productions_for(nonterminal)
        if len(productions) > 1:
            prefixes = get_prefixes(productions)
            for prefix, v in prefixes.items():
                if (len(v) == 1):
                    new_productions.append(Rule(nonterminal, tuple(v[0])))
                    continue
                new_x = __generate_key(grammar, nonterminal)
                body = [prefix] + [new_x]
                new_productions.append(Rule(nonterminal, tuple(body)))
                for prod in v:
                    if not prod:
                        new_productions.append(
                            Rule(new_x, tuple([grammar.epsilon])))
                    else:
                        new_productions.append(Rule(new_x, tuple(prod)))
        else:
            new_productions.append(Rule(nonterminal, tuple(productions[0])))

    for prod in new_productions:
        new_grammar.add_rule(prod)
    return __normalize_productions(new_grammar)
Beispiel #3
0
    def main(self, *args):
        if len(args) == 2:
            input_prefix, output_prefix = args
            mode = MODE_STRING
        elif len(args) == 3:
            input_prefix, output_prefix, mode_s = args
            if mode_s == 'string':
                mode = MODE_STRING
            elif mode_s == 'tree':
                mode = MODE_TREE
            else:
                raise InputFormatException("mode must be string or tree")
        else:
            raise InvocationException()

        grammar = Rule.load_from_file(input_prefix)
        binarized_grammar = {}

        next_rule_id = 0
        for rule_id in grammar:
            rule = grammar[rule_id]
            if mode == MODE_STRING:
                b_rules, next_rule_id = rule.binarize(next_rule_id)
            else:  # mode = MODE_TREE
                b_rules, next_rule_id = rule.binarize_tree(next_rule_id)
            if not b_rules:
                continue
            for b_rule in b_rules:
                binarized_grammar[b_rule.rule_id] = b_rule

        Rule.write_to_file(binarized_grammar, output_prefix)
 def test_parsing(self):
     h = Grammar(start='P')
     h.add_rule(Rule('P', ('D', )))
     h.add_rule(Rule('D', ('T', ':', 'id', ';', 'D')))
     h.add_rule(Rule('D', ('ε', )))
     h.add_rule(Rule('T', ('real', )))
     h.add_rule(Rule('T', ('int', )))
     self.assertEqual(self.g, h)
Beispiel #5
0
def parse_bnf(text, epsilon='ε', eof='$'):
    """
    Parse BNF from text
    :param text: grammar especification
    :param epsilon: empty symbol
    :param eof: EOF symbol
    :return: a grammar

    Productions use the following format:

    Start -> A
    A -> ( A ) | Two
    Two -> a
    Two -> b
    """
    try:
        productions = [
            p for p in text.strip().split('\n') if not p.startswith('#')
        ]
        start = productions[0].split(
            '->')[0].strip()  # First rule as starting symbol
        g = Grammar(start=start, epsilon=epsilon, eof=eof)

        for r in productions:
            head, body = [x.strip() for x in r.split('->')]
            productions = [p.strip() for p in body.split('|')]
            productions_tokenized = [tuple(p.split()) for p in productions]
            for p in productions_tokenized:
                g.add_rule(Rule(head, p))

        return g
    except ValueError:
        raise InvalidGrammar("Invalid grammar", text)
    def setUp(self):
        self.a = Grammar(start='X')
        self.b = Grammar(start='X')
        self.c = Grammar(start='X')

        self.a.add_rule(Rule('X', ('hello', 'Y')))
        self.a.add_rule(Rule('Y', ('world Z', )))
        self.a.add_rule(Rule('Z', ('?', )))
        self.a.add_rule(Rule('Z', ('!', )))

        # B will have productions in different order
        self.b.add_rule(Rule('Y', ('world Z', )))
        self.b.add_rule(Rule('Z', ('!', )))
        self.b.add_rule(Rule('Z', ('?', )))
        self.b.add_rule(Rule('X', ('hello', 'Y')))

        self.c.add_rule(Rule('X', ('bye', 'Y')))
        self.c.add_rule(Rule('Y', ('cruel world', )))
Beispiel #7
0
def remove_immediate_left_recursion(grammar, A):
    """
    Remove immediate left-recursion for given nonterminal
    :param grammar: input grammar
    :param A: the nonterminal
    :return: list of equivalent productions. If there are no left-recursions, the productions aren't changed.

    For each production:
    A -> A a1 | A a2 | ... | A am | b1 | b2 | ... | bn

    Replace with:
    A -> b1 A' | b2 A' | ... | bn A'
    A' -> a1 A' | a2 A' | ... | am A' | ε
    """
    productions = grammar.productions[A]
    recursive = []
    nonrecursive = []
    new_productions = []

    for p in productions:
        if p.is_left_recursive():
            recursive.append(p.body)
        else:
            nonrecursive.append(p.body)

    if not recursive:
        return productions

    new_A = __generate_key(grammar, A)
    for b in nonrecursive:
        # A -> b1 A' | ... | bn A'
        new_productions.append(Rule(A, b + (new_A, )))

    for a in recursive:
        # A' -> a1 A' | a2 A' | ... | am A'
        new_productions.append(Rule(new_A, a[1:] + (new_A, )))

    # A' -> ε
    new_productions.append(Rule(new_A, (grammar.epsilon, )))

    return new_productions
    def test_parsing_table(self):
        correct = {
            ('P', 'real'): Rule('P', ('D', )),
            ('P', 'int'): Rule('P', ('D', )),
            ('P', '$'): Rule('P', ('D', )),
            ('D', 'int'): Rule('D', ('T', ':', 'id', ';', 'D')),
            ('D', 'real'): Rule('D', ('T', ':', 'id', ';', 'D')),
            ('D', '$'): Rule('D', ('ε', )),
            ('T', 'int'): Rule('T', ('int', )),
            ('T', 'real'): Rule('T', ('real', ))
        }

        table, amb = self.g.parsing_table()
        self.assertFalse(amb)
        self.assertEqual(table, correct)
 def test_simple(self):
     a = Grammar(start='E')
     a.add_rule(Rule('E', ('E', '+', 'T')))
     a.add_rule(Rule('E', ('T', )))
     a.add_rule(Rule('T', ('T', '*', 'F')))
     a.add_rule(Rule('T', ('F', )))
     a.add_rule(Rule('F', ('(', 'E', ')')))
     a.add_rule(Rule('F', ('id', )))
     text = str(a)
     g = f.parse_bnf(text)
     self.assertEqual(a, g)
Beispiel #10
0
def remove_left_recursion(g):
    """
    Remove all left recursions from grammar
    :param g: input grammar
    :return: equivalent grammar with no left-recursions
    """
    temp_grammar = copy(g)
    new_grammar = Grammar(start=temp_grammar.start,
                          epsilon=temp_grammar.epsilon,
                          eof=temp_grammar.eof)
    nonterminals = nonterminal_ordering(temp_grammar)

    for i in range(0, len(nonterminals)):
        ai = nonterminals[i]
        for j in range(0, i):
            aj = nonterminals[j]
            for p_ai in temp_grammar.productions[ai]:
                # For each production of the form Ai -> Aj y
                if p_ai.body and aj == p_ai.body[0]:
                    replaced_productions = [
                        Rule(ai, p_aj.body + p_ai.body[1:])
                        for p_aj in temp_grammar.productions[aj]
                    ]
                    can_remove_productions = any(
                        map(lambda x: x.is_left_recursive(),
                            replaced_productions))
                    # Replace productions only if there were left-recursive ones
                    if can_remove_productions:
                        temp_grammar.remove_rule(p_ai)
                        for p in replaced_productions:
                            temp_grammar.add_rule(p)

        new_productions = remove_immediate_left_recursion(temp_grammar, ai)
        for p in new_productions:
            new_grammar.add_rule(p)

    return __normalize_productions(new_grammar)
 def test_immediate_recursion(self):
     p = Rule('E', ('E', '+' 'T'))
     self.assertTrue(p.is_left_recursive())
    def test_book_example(self):
        g = f.parse_bnf(test_data.unsolved_left_recursion)
        # Trust me, this is the answer
        answer = {
            ("T'", '+'): Rule("T'", ('ε', )),
            ('F', 'id'): Rule('F', ('id', )),
            ("E'", '+'): Rule("E'", ('+', 'T', "E'")),
            ('E', '('): Rule('E', ('T', "E'")),
            ('T', '('): Rule('T', ('F', "T'")),
            ("E'", '$'): Rule("E'", ('ε', )),
            ("T'", '*'): Rule("T'", ('*', 'F', "T'")),
            ("T'", ')'): Rule("T'", ('ε', )),
            ("T'", '$'): Rule("T'", ('ε', )),
            ("E'", ')'): Rule("E'", ('ε', )),
            ('T', 'id'): Rule('T', ('F', "T'")),
            ('E', 'id'): Rule('E', ('T', "E'")),
            ('F', '('): Rule('F', ('(', 'E', ')'))
        }

        table, amb = g.parsing_table(is_clean=False)

        self.assertFalse(amb)
        self.assertEqual(table, answer)