Exemple #1
0
 def test_profiling_conversions():
     """ Tests multiple conversions """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_c = Terminal("c")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_c])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
 def test_reverse(self):
     """ Test the reversal of a CFG """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.reverse()
     self.assertEqual(len(new_cfg.variables), 1)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 2)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_b, ter_b, ter_a, ter_a]))
 def test_union(self):
     """ Tests the union of two cfg """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.union(cfg)
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 6)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_a, ter_a, ter_b, ter_b]))
    def read_production_regex(cls, head, regex, id, case_sens=True):
        var_by_state = dict()
        terminals, variables, productions = set(), set(), set()

        enfa = regex.to_epsilon_nfa().minimize()

        if len(enfa.states) == 0:
            variables.add(head)
            productions.add(Production(head, [Epsilon()]))
            return productions, variables, terminals, id

        for state in enfa.states:
            var_by_state[state] = Variable(f'Id{id},{state}')
            id += 1

        transitions = enfa._transition_function

        for start_st in enfa.start_states:
            productions.add(Production(head, [var_by_state[start_st]]))

        for st_from, symb, st_to in transitions:
            new_head = var_by_state[st_from]
            new_body = []

            value = symb.value

            if value == 'eps':
                new_body.append(Epsilon())
            elif value.isupper() and case_sens:
                variable = Variable(value)
                new_body.append(variable)
                variables.add(variable)
            elif value.isdigit() or value.islower() or not case_sens:
                variable = Terminal(value)
                new_body.append(variable)
                variables.add(variable)

            else:
                raise ValueError(
                    f'Symbol "{value}" should be either lower or upper case')

            new_body.append(var_by_state[st_to])

            productions.add(Production(new_head, new_body))

            if st_to in enfa.final_states:
                productions.add(Production(var_by_state[st_to], []))

        return productions, variables, terminals, id
 def test_substitution(self):
     """ Tests substitutions in a CFG """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.substitute({ter_a: cfg})
     self.assertEqual(len(new_cfg.variables), 2)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 4)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(
         new_cfg.contains([ter_a, ter_b, ter_a, ter_b, ter_b, ter_b]))
 def test_nullable_object(self):
     """ Tests the finding of nullable objects """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(var_a, [ter_a, var_a, var_a])
     prod2 = Production(var_a, [Epsilon()])
     prod3 = Production(var_b, [ter_b, var_b, var_b])
     prod4 = Production(var_b, [Epsilon()])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2, prod3, prod4})
     self.assertEqual(cfg.get_nullable_symbols(), {var_a, var_b, start})
    def from_grammar_file(path, python_regex=False, nonterms_upper=True):
        with open(path, 'r') as g:
            productions = set()

            first_line = g.readline()
            rule = first_line.strip().split(' ', 1)
            start_symbol = Variable(rule[0])
            if any(symb in rule[1] for symb in '?+*|') and len(rule[1]) > 1:
                body = rule[1].replace('?', f'| eps')
                productions |= GrammarAlgos.prod_from_regex(
                    start_symbol, body, python_regex, nonterms_upper)
            else:
                body = []
                for s in rule[1].split(' '):
                    if s == 'eps':
                        e = Epsilon()
                        body.append(e)
                    elif s.isupper():
                        v = Variable(s)
                        body.append(v)
                    else:
                        t = Terminal(s)
                        body.append(t)
                productions.add(Production(start_symbol, body))

            for line in g.readlines():
                rule = line.strip().split(' ', 1)
                var = Variable(rule[0])
                if any(symb in rule[1]
                       for symb in '?+*|') and len(rule[1]) > 1:
                    body = rule[1].replace('?', f'| eps')
                    productions |= GrammarAlgos.prod_from_regex(
                        var, body, python_regex, nonterms_upper)
                else:
                    body = []
                    for s in rule[1].split(' '):
                        if s == 'eps':
                            e = Epsilon()
                            body.append(e)
                        elif s.isupper():
                            v = Variable(s)
                            body.append(v)
                        else:
                            t = Terminal(s)
                            body.append(t)
                    productions.add(Production(var, body))

            return CFG(start_symbol=start_symbol, productions=productions)
Exemple #8
0
 def test_derivation_empty(self):
     var_s = Variable("S")
     productions = [Production(var_s, [Epsilon()])]
     cfg = CFG(productions=productions, start_symbol=var_s)
     parse_tree = cfg.get_cnf_parse_tree([])
     derivation = parse_tree.get_rightmost_derivation()
     self.assertEqual([[var_s], []], derivation)
Exemple #9
0
 def production_to_pretty(production: Production):
     new_production = Production(
             cfg_obj_to_pretty(production.head),
             list(map(cfg_obj_to_pretty, production.body)),
             False
         )
     return new_production
    def read_grammar(cls, name):
        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, *body = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                body_cfg = []
                for letter in body:
                    if letter.isupper():
                        variable = Variable(letter)
                        variables.add(variable)
                        body_cfg.append(variable)
                    else:
                        terminal = Terminal(letter)
                        terminals.add(terminal)
                        body_cfg.append(terminal)

                productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
    def to_cnf(cfg):
        if cfg.generate_epsilon():
            cfg = cfg.to_normal_form()
            new_start_symbol = Variable(cfg.start_symbol.value + "'")
            cfg.productions.add(Production(new_start_symbol, []))
            res = CFG(variables=cfg.variables,
                      terminals=cfg.terminals,
                      start_symbol=new_start_symbol)
            res.variables.add(new_start_symbol)
            for production in cfg.productions:
                if production.head == cfg.start_symbol:
                    res.productions.add(
                        Production(new_start_symbol, production.body))
                res.productions.add(production)
            return res

        return cfg.to_normal_form()
 def test_useless_removal(self):
     """ Test the removal of useless symbols """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(start, [ter_a])
     prod2 = Production(var_a, [ter_b])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2})
     new_cfg = cfg.remove_useless_symbols()
     self.assertEqual(len(new_cfg.variables), 1)
     self.assertEqual(len(new_cfg.terminals), 1)
     self.assertEqual(len(new_cfg.productions), 1)
     self.assertFalse(cfg.is_empty())
 def test_finite(self):
     """ Tests whether a grammar is finite or not """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     var_a = Variable("A")
     var_b = Variable("B")
     prod0 = {
         Production(var_s, [var_a, var_b]),
         Production(var_a, [ter_a]),
         Production(var_b, [ter_b])
     }
     cfg = CFG(productions=prod0, start_symbol=var_s)
     self.assertTrue(cfg.is_finite())
     prod0.add(Production(var_a, [var_s]))
     cfg = CFG(productions=prod0, start_symbol=var_s)
     self.assertFalse(cfg.is_finite())
 def test_conversions(self):
     """ Tests multiple conversions """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_c = Terminal("c")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_c])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     self.assertTrue(cfg.contains([ter_c]))
     self.assertTrue(cfg.contains([ter_a, ter_c, ter_b]))
     self.assertTrue(cfg.contains([ter_a, ter_a, ter_c, ter_b, ter_b]))
     self.assertFalse(cfg.contains([ter_b, ter_c, ter_a]))
     self.assertFalse(cfg.contains([ter_b, ter_b, ter_c, ter_a, ter_a]))
 def test_pos_closure(self):
     """ Tests the closure of a cfg """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_c = Terminal("c")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [ter_c])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.get_positive_closure()
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 3)
     self.assertEqual(len(new_cfg.productions), 6)
     self.assertFalse(new_cfg.is_empty())
     self.assertFalse(new_cfg.contains([]))
     self.assertTrue(
         new_cfg.contains(
             [ter_a, ter_a, ter_c, ter_b, ter_b, ter_a, ter_c, ter_b]))
Exemple #16
0
def regex_to_grammar_productions(regex, head, var_dict, terminal_dict):
    _var_dict = {}
    production_set = set()

    # Getting an NFA from regex
    enfa = regex.to_epsilon_nfa()
    enfa = enfa.minimize()
    transitions = enfa._transition_function._transitions

    # Producing variables from NFA states
    for state in enfa.states:
        _var_dict[state] = Variable(
            # Creating new CFG variable with unique name
            '%s#REGEX#%s' % (head.value, get_new_var_num()))

    for head_state in transitions:
        # Adding productions from head to start states
        for start_state in enfa.start_states:
            start_p = Production(head, [_var_dict[start_state]])
            production_set.add(start_p)

        # Getting productions from NFA transitions
        for sym in list(transitions[head_state]):
            body_state = transitions[head_state][sym]
            inner_head = _var_dict[head_state]
            inner_body = []

            if sym in var_dict:
                inner_body.append(var_dict[sym])
            elif sym in terminal_dict:
                inner_body.append(terminal_dict[sym])
            elif sym == EPS_SYM:
                inner_body.append(Epsilon())
            else:
                raise ValueError(f'''Symbol "{sym}" is not defined as
                                 a terminal or a variable''')

            inner_body.append(_var_dict[body_state])
            production_set.add(Production(inner_head, inner_body))

            if transitions[head_state][sym] in enfa.final_states:
                eps_p = Production(_var_dict[body_state], [])
                production_set.add(eps_p)
    return production_set
Exemple #17
0
 def __init__(self,
              cfg: CFG,
              is_reduced: bool = False):
     if not is_reduced:
         if any(p.body.__contains__(cfg.start_symbol) for p in cfg.productions):
             new_start_variable_name = 'S\''
             name_is_used = cfg.variables.__contains__(new_start_variable_name)
             while name_is_used:
                 new_start_variable_name += '\''
                 name_is_used = cfg.variables.__contains__(new_start_variable_name)
             new_start_variable = Variable(new_start_variable_name)
             cfg._productions.add(Production(new_start_variable, [cfg._start_symbol]))
             cfg._variables.add(new_start_variable)
             cfg._start_symbol = new_start_variable
     generate_epsilon = cfg.generate_epsilon()
     cfg = cfg.to_normal_form()
     if generate_epsilon:
         cfg._productions.add(Production(cfg.start_symbol, []))
     super().__init__(cfg.variables, cfg.terminals, cfg.start_symbol, cfg.productions)
Exemple #18
0
 def test_creation(self):
     prod0 = Production(Variable("S0"), [Terminal("S1"), Variable("a")])
     prod1 = Production(Variable("S0"), [Terminal("S1"), Variable("a")])
     prod2 = Production(Variable("S0'"), [Terminal("S1"), Variable("a")])
     prod3 = Production(Variable("S0"), [Terminal("S2"), Variable("a")])
     prod4 = Production(Variable("S0"), [Terminal("S2"), Variable("b")])
     self.assertEqual(prod0, prod1)
     self.assertNotEqual(prod0, prod2)
     self.assertNotEqual(prod0, prod3)
     self.assertNotEqual(prod0, prod4)
     self.assertEqual(str(prod0), str(prod1))
     self.assertNotEqual(str(prod0), str(prod2))
     self.assertNotEqual(str(prod0), str(prod3))
     self.assertNotEqual(str(prod0), str(prod4))
     self.assertEqual(hash(prod0), hash(prod1))
     self.assertNotEqual(hash(prod0), hash(prod2))
     self.assertNotEqual(hash(prod0), hash(prod3))
     self.assertNotEqual(hash(prod0), hash(prod4))
     self.assertIn(" -> ", str(prod0))
 def test_generation_words2(self):
     """ Tests the generation of word """
     ter_a = Terminal("a")
     var_s = Variable("S")
     var_s1 = Variable("S1")
     var_s2 = Variable("S2")
     productions = {
         Production(var_s, [var_s1, ter_a]),
         Production(var_s1, [var_s2, ter_a]),
         Production(var_s1, []),
         Production(var_s2, []),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     words0 = list(cfg.get_words())
     self.assertIn([], words0)
     self.assertIn([ter_a], words0)
     self.assertIn([ter_a, ter_a], words0)
     self.assertEqual(len(words0), 3)
    def read_grammar(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, _, *body_full = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                tmp_body = []
                bodies = [
                    list(group)
                    for k, group in groupby(body_full, lambda x: x == "|")
                    if not k
                ]

                for body in bodies:

                    is_regex = not any([
                        True if '*' not in value else False for value in body
                    ])

                    if is_regex:
                        new_productions, new_variables, new_terminals, id = CFGrammar \
                                                                            .read_production_regex(head, Regex.from_python_regex(body[0]), id, False)

                        productions |= new_productions
                        variables |= new_variables
                        terminals |= new_terminals
                    else:
                        body_cfg = []
                        for letter in body:
                            if letter == "epsilon":
                                body_cfg.append(Epsilon())
                            elif letter.isupper():
                                non_terminal = Variable(letter)
                                variables.add(non_terminal)
                                body_cfg.append(non_terminal)
                            else:
                                terminal = Terminal(letter)
                                terminals.add(terminal)
                                body_cfg.append(terminal)

                        productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
Exemple #21
0
 def test_get_rightmost_derivation(self):
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     var_a = Variable("A")
     var_b = Variable("B")
     var_c = Variable("C")
     productions = [
         Production(var_s, [var_c, var_b]),
         Production(var_c, [var_a, var_a]),
         Production(var_a, [ter_a]),
         Production(var_b, [ter_b])
     ]
     cfg = CFG(productions=productions, start_symbol=var_s)
     parse_tree = cfg.get_cnf_parse_tree([ter_a, ter_a, ter_b])
     derivation = parse_tree.get_rightmost_derivation()
     self.assertEqual(
         derivation,
         [[var_s], [var_c, var_b], [var_c, ter_b], [var_a, var_a, ter_b],
          [var_a, ter_a, ter_b], [ter_a, ter_a, ter_b]])
 def test_remove_epsilon(self):
     """ Tests the removal of epsilon """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(var_a, [ter_a, var_a, var_a])
     prod2 = Production(var_a, [Epsilon()])
     prod3 = Production(var_b, [ter_b, var_b, var_b])
     prod4 = Production(var_b, [])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2, prod3, prod4})
     new_cfg = cfg.remove_epsilon()
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(set(new_cfg.productions)), 9)
     self.assertEqual(len(new_cfg.get_nullable_symbols()), 0)
     self.assertFalse(cfg.is_empty())
Exemple #23
0
def cnf(cfgrammar):
    if not cfgrammar.generate_epsilon():
        return cfgrammar.to_normal_form()

    else:
        cfgrammar = cfgrammar.to_normal_form()
        new_symbol = Variable(cfgrammar.start_symbol.value + "'")
        cfgrammar.productions.add(Production(new_symbol, []))

        output = CFG(variables=cfgrammar.variables,
                     start_symbol=new_symbol,
                     terminals=cfgrammar.terminals)

        output.variables.add(new_symbol)

        for i in cfgrammar.productions:
            if cfgrammar.start_symbol == i.head:
                output.productions.add(Production(new_symbol, i.body))
            output.productions.add(i)

        return output
    def test_membership(self):
        """ Tests the membership of a CFG """
        # pylint: disable=too-many-locals
        var_useless = Variable("USELESS")
        var_s = Variable("S")
        var_b = Variable("B")
        ter_a = Terminal("a")
        ter_b = Terminal("b")
        ter_c = Terminal("c")
        prod0 = Production(var_s, [ter_a, var_s, var_b])
        prod1 = Production(var_useless, [ter_a, var_s, var_b])
        prod2 = Production(var_s, [var_useless])
        prod4 = Production(var_b, [ter_b])
        prod5 = Production(var_useless, [])
        cfg0 = CFG({var_useless, var_s}, {ter_a, ter_b}, var_s,
                   {prod0, prod1, prod2, prod4, prod5})
        self.assertTrue(cfg0.contains([Epsilon()]))
        self.assertTrue(cfg0.contains([ter_a, ter_b]))
        self.assertTrue(cfg0.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(
            cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_b, ter_c, ter_b]))
        self.assertFalse(cfg0.contains([ter_a, ter_a, ter_a, ter_b, ter_b]))

        prod3 = Production(var_s, [ter_c])
        cfg0 = CFG({var_s}, {ter_a, ter_b, ter_c}, var_s, {prod0, prod3})
        self.assertFalse(cfg0.contains([Epsilon()]))

        var_a = Variable("A")
        prod6 = Production(var_s, [var_a, var_b])
        prod7 = Production(var_a, [var_a, var_b])
        prod8 = Production(var_a, [ter_a])
        prod9 = Production(var_b, [ter_b])
        cfg1 = CFG({var_a, var_b, var_s}, {ter_a, ter_b}, var_s,
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains([ter_a, ter_b, ter_b]))
        cfg1 = CFG({"A", "B", "S"}, {"a", "b"}, "S",
                   {prod6, prod7, prod8, prod9})
        self.assertTrue(cfg1.contains(["a", "b", "b"]))
Exemple #25
0
    def regex_to_grammar_productions(regex, head):
        _var_dict = {}
        production_set = set()

        # Getting an NFA from regex
        enfa = regex.to_epsilon_nfa()
        enfa = enfa.minimize()
        transitions = enfa._transition_function._transitions

        for state in enfa.states:
            _var_dict[state] = Variable(
                # Creating new CFG variable with unique name
                '%s#REGEX#%s' % (head.value, get_new_var_num()))

        for head_state in transitions:
            # Adding productions from head to start states
            for start_state in enfa.start_states:
                start_p = Production(head, [_var_dict[start_state]])
                production_set.add(start_p)

            # Getting productions from NFA transitions
            for sym in list(transitions[head_state]):
                body_state = transitions[head_state][sym]
                inner_head = _var_dict[head_state]
                inner_body = []

                if sym.value == EPS_SYM:
                    inner_body.append(Epsilon())
                elif sym.value.isupper():
                    inner_body.append(Variable(sym))
                else:
                    inner_body.append(Terminal(sym))

                inner_body.append(_var_dict[body_state])
                production_set.add(Production(inner_head, inner_body))

                if transitions[head_state][sym] in enfa.final_states:
                    eps_p = Production(_var_dict[body_state], [])
                    production_set.add(eps_p)
        return production_set
Exemple #26
0
    def from_text(cls, text: List[str]):
        start_symbol = None
        eps_productions = []
        productions_with_dfa = []
        for line in text:
            raw_head, *raw_body = line.strip().split(' ', 1)
            regex = Regex(' '.join(raw_body).replace('eps', 'epsilon'))
            head = Variable(raw_head)
            if start_symbol is None:
                start_symbol = head
            if not raw_body:
                eps_productions.append(Production(head, []))
            dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa(
            ).to_deterministic().minimize()
            productions_with_dfa.append((head, dfa))

        import wrappers.GraphWrapper
        rfa_graph = wrappers.GraphWrapper.empty()
        rfa_graph.matrix_size = sum(
            [len(dfa.states) for _, dfa in productions_with_dfa])
        rfa_graph.vertices = set()
        empty_matrix = Matrix.sparse(types.BOOL, rfa_graph.matrix_size,
                                     rfa_graph.matrix_size)
        head_by_start_final_pair = {}
        total_states_counter = 0

        for head, dfa in productions_with_dfa:
            transitions = dfa._transition_function._transitions
            num_by_state = {}
            for state in dfa.states:
                num_by_state[state] = total_states_counter
                total_states_counter += 1
            rfa_graph.vertices.update(num_by_state.values())

            for start_state in dfa.start_states:
                rfa_graph.start_states.add(num_by_state[start_state])
            for final_state in dfa.final_states:
                rfa_graph.final_states.add(num_by_state[final_state])
                head_by_start_final_pair[
                    num_by_state[dfa.start_state],
                    num_by_state[final_state]] = head.value

            for state_from in transitions:
                for edge_symb in transitions[state_from]:
                    state_to = transitions[state_from][edge_symb]
                    matrix = rfa_graph.label_to_bool_matrix.setdefault(
                        edge_symb, empty_matrix.dup())
                    matrix[num_by_state[state_from],
                           num_by_state[state_to]] = True

        return cls(rfa_graph, head_by_start_final_pair, eps_productions,
                   start_symbol)
    def prod_from_regex(head, regex, python_regex=False, nonterms_upper=True):
        if python_regex:
            regex = Regex.from_python_regex(regex)
        else:
            regex = Regex(regex)

        enfa = regex.to_epsilon_nfa().minimize()
        transitions = enfa.to_dict()
        state_to_var = dict()
        production_set = set()

        for state in enfa.states:
            global state_counter
            state_counter += 1
            state_to_var[state] = Variable(f'State{state_counter}')

        for start_state in enfa.start_states:
            production_set.add(Production(head, [state_to_var[start_state]]))

        for head_state, transition in transitions.items():
            for symbol, body_state in transition.items():
                prod_head = state_to_var[head_state]
                prod_body = list()

                if symbol.value == 'eps':
                    prod_body.append(Epsilon())
                elif nonterms_upper and symbol.value.isupper():
                    prod_body.append(Variable(symbol.value))
                else:
                    prod_body.append(Terminal(symbol.value))

                prod_body.append(state_to_var[body_state])
                production_set.add(Production(prod_head, prod_body))

                if body_state in enfa.final_states:
                    production_set.add(Production(state_to_var[body_state],
                                                  []))

        return production_set
 def test_intersection(self):
     """ Tests the intersection with a regex """
     regex = Regex("a*b*")
     dfa = regex.to_epsilon_nfa().to_deterministic()
     symb_a = Symbol("a")
     symb_b = Symbol("b")
     self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))
     self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a]))
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
     cfg_i = cfg.intersection(regex)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
Exemple #29
0
    def __init__(self, start_symbol=None, productions=None):
        cfg = CFG(start_symbol=start_symbol, productions=productions)

        cnf = cfg.to_normal_form()
        # needed for language preservation
        if cfg.generate_epsilon():
            cnf._productions.add(Production(cnf._start_symbol, []))

        self.pair_productions = set()
        for p in cnf._productions:
            if len(p.body) == 2:
                self.pair_productions.add(p)

        super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol,
                                         productions=cnf._productions)
Exemple #30
0
    def regex_to_production(regex, head):
        _dict = {}
        production_set = set()

        enfa = regex.to_epsilon_nfa()
        enfa = enfa.minimize()
        transitions = enfa._transition_function._transitions

        for state in enfa.states:
            _dict[state] = Variable('%s#REGEX#%s' %
                                    (head.value, get_new_var_num()))

        for head_state in transitions:
            for start_state in enfa.start_states:
                start_production = Production(head, [_dict[start_state]])
                production_set.add(start_production)

            for symbol in list(transitions[head_state]):
                body_state = transitions[head_state][symbol]
                inner_head = _dict[head_state]
                inner_body = []

                if symbol.value == EPS_SYM:
                    inner_body.append(Epsilon())
                elif symbol.value.isupper():
                    inner_body.append(Variable(symbol))
                else:
                    inner_body.append(Terminal(symbol))

                inner_body.append(_dict[body_state])
                production_set.add(Production(inner_head, inner_body))

                if transitions[head_state][symbol] in enfa.final_states:
                    eps_production = Production(_dict[body_state], [])
                    production_set.add(eps_production)
        return production_set