예제 #1
0
 def test_get_repr(self):
     regex0 = Regex("a*.(b|c)epsilon")
     regex_str = str(regex0)
     regex1 = Regex(regex_str)
     dfa0 = regex0.to_epsilon_nfa().to_deterministic().minimize()
     dfa1 = regex1.to_epsilon_nfa().to_deterministic().minimize()
     self.assertEqual(dfa0, dfa1)
예제 #2
0
def test_dfa_nfa_intersection():
    nfa1 = NondeterministicFiniteAutomaton()
    state0 = finite_automaton.State(0)
    state1 = finite_automaton.State(1)
    state2 = finite_automaton.State(2)
    state3 = finite_automaton.State(3)
    nfa1.add_transition(state0, symb_a, state1)
    nfa1.add_transition(state0, symb_c, state2)
    nfa1.add_transition(state1, symb_a, state1)
    nfa1.add_transition(state1, symb_b, state2)
    nfa1.add_transition(state2, symb_a, state0)
    nfa1.add_transition(state0, symb_c, state3)
    nfa1.add_transition(state3, symb_a, state1)
    nfa1.add_start_state(state0)
    nfa1.add_final_state(state1)

    s = ("((((b.a)|(((a|(b.c))|(c.a)).(((b.c))*." +
         "(b.a)))))*.(((a|(b.c))|(c.a)).((b.c))*))")
    r = Regex(s)
    dfa = r.to_epsilon_nfa().to_deterministic().minimize()
    dnfa = dfa.get_intersection(nfa1)
    assert (dnfa.accepts([symb_a]))
    assert (dnfa.accepts([symb_c, symb_a]))
    assert (dnfa.accepts([symb_a, symb_b, symb_a, symb_a]))
    assert (not dnfa.accepts([symb_c, symb_b]))
    assert (not dnfa.accepts([symb_a, symb_a]))
    assert (not dnfa.accepts([symb_b]))
예제 #3
0
 def test_intersection(self):
     """ Tests the intersection with a regex """
     regex = Regex("a*b*")
     dfa = regex.to_epsilon_nfa()
     symb_a = Symbol("a")
     symb_b = Symbol("b")
     self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))
     self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a]))
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
     cfg_i = cfg.intersection(regex)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
     cfg_i = cfg.intersection(dfa)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
예제 #4
0
 def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG:
     dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
     transitions = dfa._transition_function._transitions
     state_to_var: Dict[State, Variable] = {}
     productions, terms, vars = set(), set(), set()
     for state in dfa.states:
         state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}')
         cls.__var_state_counter += 1
     vars.update(state_to_var.values())
     for start_state in dfa.start_states:
         productions.add(Production(head, [state_to_var[start_state]]))
     for state_from in transitions:
         for edge_symb in transitions[state_from]:
             state_to = transitions[state_from][edge_symb]
             current_prod_head = state_to_var[state_from]
             current_prod_body = []
             if (not variables and edge_symb.value.isupper()
                     or variables and edge_symb.value in variables):
                 var = Variable(edge_symb.value)
                 vars.add(var)
                 current_prod_body.append(var)
             else:
                 term = Terminal(edge_symb.value)
                 terms.add(term)
                 current_prod_body.append(term)
             current_prod_body.append(state_to_var[state_to])
             productions.add(Production(current_prod_head, current_prod_body))
             if state_to in dfa.final_states:
                 productions.add(Production(state_to_var[state_to], []))
     if not productions:
         return CFG(vars, terms, head, {Production(head, [])})
     return CFG(vars, terms, head, productions)
예제 #5
0
def from_regex_file(path: str):
    file = open(path)
    regex = Regex(file.readline())
    file.close()
    dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa(
    ).to_deterministic().minimize()
    return from_dfa(dfa)
예제 #6
0
def test_regex_to_epsilon_nfa():
    regex = Regex("(a|a a b)*")
    enfa = regex.to_epsilon_nfa()
    assert (len(enfa.states) == 12)
    assert (enfa.accepts([symb_a]))
    assert (enfa.accepts([symb_a, symb_a, symb_b, epsilon]))
    assert (not enfa.accepts([symb_c]))
    assert (enfa.accepts([epsilon]))
    assert (not enfa.accepts([symb_b, symb_a]))
예제 #7
0
    def from_text(cls, text: List[str]):
        start_symbol = None
        eps_productions = []
        productions_with_dfa = []
        for line in text:
            raw_head, *raw_body = line.strip().split(' ', 1)
            regex = Regex(' '.join(raw_body).replace('eps', 'epsilon'))
            head = Variable(raw_head)
            if start_symbol is None:
                start_symbol = head
            if not raw_body:
                eps_productions.append(Production(head, []))
            dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa(
            ).to_deterministic().minimize()
            productions_with_dfa.append((head, dfa))

        import wrappers.GraphWrapper
        rfa_graph = wrappers.GraphWrapper.empty()
        rfa_graph.matrix_size = sum(
            [len(dfa.states) for _, dfa in productions_with_dfa])
        rfa_graph.vertices = set()
        empty_matrix = Matrix.sparse(types.BOOL, rfa_graph.matrix_size,
                                     rfa_graph.matrix_size)
        head_by_start_final_pair = {}
        total_states_counter = 0

        for head, dfa in productions_with_dfa:
            transitions = dfa._transition_function._transitions
            num_by_state = {}
            for state in dfa.states:
                num_by_state[state] = total_states_counter
                total_states_counter += 1
            rfa_graph.vertices.update(num_by_state.values())

            for start_state in dfa.start_states:
                rfa_graph.start_states.add(num_by_state[start_state])
            for final_state in dfa.final_states:
                rfa_graph.final_states.add(num_by_state[final_state])
                head_by_start_final_pair[
                    num_by_state[dfa.start_state],
                    num_by_state[final_state]] = head.value

            for state_from in transitions:
                for edge_symb in transitions[state_from]:
                    state_to = transitions[state_from][edge_symb]
                    matrix = rfa_graph.label_to_bool_matrix.setdefault(
                        edge_symb, empty_matrix.dup())
                    matrix[num_by_state[state_from],
                           num_by_state[state_to]] = True

        return cls(rfa_graph, head_by_start_final_pair, eps_productions,
                   start_symbol)
예제 #8
0
    def from_regex(self, filename):
        input_file = open(filename)
        regex = Regex(input_file.read().rstrip())
        dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
        self.n_vertices = len(dfa.states)
        state_renumeration = dict()
        i = 0
        for state in dfa.states:
            state_renumeration[state] = i
            i += 1
            
        for fro, label, to in dfa._transition_function.get_edges():
            self.get_by_label(str(label))[state_renumeration[fro], state_renumeration[to]] = True

        self.start_vertices.add(state_renumeration[dfa.start_state])

        for state in dfa.final_states:
            self.final_vertices.add(state_renumeration[state])
예제 #9
0
def str_to_graph(s):
    r = Regex(s)
    a = r.to_epsilon_nfa().minimize()
    start_states = list(a.start_states)
    final_states = list(a.final_states)
    g = a.to_networkx()
    g2 = nx.convert_node_labels_to_integers(g, ordering="sorted")
    d = {}
    i = 0
    for node in sorted(g.nodes):
        d[node] = sorted(g2.nodes)[i]
        i += 1
    for i in range(len(start_states)):
        start_states[i] = d[start_states[i]]
    for i in range(len(final_states)):
        final_states[i] = d[final_states[i]]
    labels = nx.get_edge_attributes(g2, 'label')
    return labels, sorted(start_states), sorted(final_states)
예제 #10
0
def regex_to_pda_graph(regex, first_node_number):
    regex = Regex(regex)
    nfa: EpsilonNFA = regex.to_epsilon_nfa().minimize()
    graph: nx.MultiDiGraph = nfa.to_networkx()
    killing_list = []
    for node in graph.nodes:
        if not graph.nodes[node]['label']:
            killing_list.append(node)
    for node in killing_list:
        graph.remove_node(node)
    my_map = {}
    i = first_node_number
    for node in sorted(graph.nodes):
        my_map[node] = i
        i += 1
    graph: nx.Graph = nx.relabel_nodes(graph, my_map)
    for edge in graph.edges:
        graph.edges[edge]['label'] = [graph.edges[edge]['label']]
    return nx.DiGraph(graph)
예제 #11
0
    def from_regex(cls, regex: Regex, initial_label: Symbol):
        """ Create a recursive automaton from regular expression

        Parameters
        -----------
        regex : :class:`~pyformlang.regular_expression.Regex`
            The regular expression
        initial_label : :class:`~pyformlang.finite_automaton.Symbol`
            The initial label for the recursive automaton

        Returns
        -----------
        rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
            The new recursive automaton built from regular expression
        """

        initial_label = to_symbol(initial_label)
        box = Box(regex.to_epsilon_nfa().minimize(), initial_label)
        return RecursiveAutomaton({initial_label}, initial_label, {box})
예제 #12
0
    def parse_regex(self, file_path):
        self.__init__()
        # read regex from file
        regex_file = open(file_path, 'r')
        regex = Regex(regex_file.read().rstrip())
        regex_file.close()

        # regex to dfa conversion and vertices count init
        dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
        self.vertices_count = len(dfa.states)

        # states enumeration
        states = {}
        start = 0
        for state in dfa._states:
            if state not in states:
                states[state] = start
                start = start + 1

        # init label_matrices
        for start in dfa._states:
            for label in dfa._input_symbols:
                in_states = dfa._transition_function(start, label)
                for end in in_states:
                    if label in self.label_matrices:
                        self.label_matrices[label][states[start],
                                                   states[end]] = True
                    else:
                        bool_matrix = Matrix.sparse(BOOL, self.vertices_count,
                                                    self.vertices_count)
                        bool_matrix[states[start], states[end]] = True
                        self.label_matrices[label] = bool_matrix

        # init start and terminal states
        self.start_vertices.add(states[dfa.start_state])
        for state in dfa._final_states:
            self.terminal_vertices.add(states[state])
        return self
예제 #13
0
 def test_to_enfa0(self):
     """ Tests the transformation to a regex """
     symb_a = finite_automaton.Symbol("a")
     symb_b = finite_automaton.Symbol("b")
     symb_c = finite_automaton.Symbol("c")
     epsilon = finite_automaton.Epsilon()
     regex = Regex("a|b")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a]))
     self.assertTrue(enfa.accepts([symb_b]))
     self.assertFalse(enfa.accepts([symb_c]))
     self.assertFalse(enfa.accepts([epsilon]))
     self.assertFalse(enfa.accepts([symb_a, symb_b]))
     regex = Regex("a b")
     enfa = regex.to_epsilon_nfa()
     self.assertFalse(enfa.accepts([symb_a]))
     self.assertFalse(enfa.accepts([symb_b]))
     self.assertTrue(enfa.accepts([symb_a, symb_b]))
     regex = Regex("a b c")
     enfa = regex.to_epsilon_nfa()
     self.assertFalse(enfa.accepts([symb_a, symb_b]))
     self.assertTrue(enfa.accepts([symb_a, symb_b, symb_c]))
     self.assertFalse(enfa.accepts([symb_a, symb_b, symb_a]))
     regex = Regex("(a b)|c")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a, symb_b]))
     self.assertFalse(enfa.accepts([symb_a, symb_c]))
     self.assertFalse(enfa.accepts([symb_b, symb_c]))
     self.assertTrue(enfa.accepts([symb_c]))
     regex = Regex("")
     enfa = regex.to_epsilon_nfa()
     self.assertFalse(enfa.accepts([symb_a]))
     self.assertFalse(enfa.accepts([symb_b]))
     self.assertFalse(enfa.accepts([symb_c]))
     self.assertFalse(enfa.accepts([]))
     regex = Regex("a*")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a]))
     self.assertTrue(enfa.accepts([]))
     self.assertTrue(enfa.accepts([symb_a, symb_a]))
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_a]))
예제 #14
0
 def test_to_enfa1(self):
     """ Tests the transformation to a regex """
     symb_a = finite_automaton.Symbol("a")
     symb_b = finite_automaton.Symbol("b")
     symb_c = finite_automaton.Symbol("c")
     regex = Regex("a**")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a]))
     self.assertTrue(enfa.accepts([]))
     self.assertTrue(enfa.accepts([symb_a, symb_a]))
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_a]))
     regex = Regex("a*b|c")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b]))
     self.assertTrue(enfa.accepts([symb_b]))
     self.assertTrue(enfa.accepts([symb_c]))
     self.assertFalse(enfa.accepts([symb_a, symb_a, symb_c]))
     regex = Regex("a*(b|c)")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b]))
     self.assertTrue(enfa.accepts([symb_b]))
     self.assertTrue(enfa.accepts([symb_c]))
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c]))
     regex = Regex("a*.(b|c)")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b]))
     self.assertTrue(enfa.accepts([symb_b]))
     self.assertTrue(enfa.accepts([symb_c]))
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c]))
     regex = Regex("a*.(b|c)epsilon")
     enfa = regex.to_epsilon_nfa()
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_b]))
     self.assertTrue(enfa.accepts([symb_b]))
     self.assertTrue(enfa.accepts([symb_c]))
     self.assertTrue(enfa.accepts([symb_a, symb_a, symb_c]))
     regex = Regex("$")
     enfa = regex.to_epsilon_nfa()
     self.assertFalse(enfa.accepts([symb_a]))
     self.assertFalse(enfa.accepts([symb_b]))
     self.assertFalse(enfa.accepts([symb_c]))
     self.assertTrue(enfa.accepts([]))
예제 #15
0
def str_to_dfa(s):
    regex = Regex(s)
    enfa = regex.to_epsilon_nfa()
    dfa = enfa.to_deterministic()
    return dfa.minimize()
예제 #16
0
def regex_to_min_dfa(regex: Regex):
    enfa = regex.to_epsilon_nfa()
    dfa = enfa.to_deterministic().minimize()
    return dfa
 def from_regex(cls, regex: str, is_python_regex=True):
     if is_python_regex:
         pyformlang_regex = Regex.from_python_regex(regex)
     else:
         pyformlang_regex = Regex(regex)
     return RegexGraphWrapper(pyformlang_regex.to_epsilon_nfa().minimize())
예제 #18
0
def regex_to_minimal_dfa(regex_str):
    regex = Regex(regex_str)
    return regex.to_epsilon_nfa().minimize()
예제 #19
0
def regex_to_dfa(regex):
    regex = Regex(regex)

    dfa = regex.to_epsilon_nfa().to_deterministic()

    return dfa
예제 #20
0
def min_dfa_from_regex(r: Regex):
    return r.to_epsilon_nfa().to_deterministic().minimize()