Exemple #1
0
def test_dfa_nfa_intersection():
    nfa1 = NondeterministicFiniteAutomaton()
    state0 = finite_automaton.State(0)
    state1 = finite_automaton.State(1)
    state2 = finite_automaton.State(2)
    state3 = finite_automaton.State(3)
    nfa1.add_transition(state0, symb_a, state1)
    nfa1.add_transition(state0, symb_c, state2)
    nfa1.add_transition(state1, symb_a, state1)
    nfa1.add_transition(state1, symb_b, state2)
    nfa1.add_transition(state2, symb_a, state0)
    nfa1.add_transition(state0, symb_c, state3)
    nfa1.add_transition(state3, symb_a, state1)
    nfa1.add_start_state(state0)
    nfa1.add_final_state(state1)

    s = ("((((b.a)|(((a|(b.c))|(c.a)).(((b.c))*." +
         "(b.a)))))*.(((a|(b.c))|(c.a)).((b.c))*))")
    r = Regex(s)
    dfa = r.to_epsilon_nfa().to_deterministic().minimize()
    dnfa = dfa.get_intersection(nfa1)
    assert (dnfa.accepts([symb_a]))
    assert (dnfa.accepts([symb_c, symb_a]))
    assert (dnfa.accepts([symb_a, symb_b, symb_a, symb_a]))
    assert (not dnfa.accepts([symb_c, symb_b]))
    assert (not dnfa.accepts([symb_a, symb_a]))
    assert (not dnfa.accepts([symb_b]))
Exemple #2
0
 def test_intersection(self):
     """ Tests the intersection with a regex """
     regex = Regex("a*b*")
     dfa = regex.to_epsilon_nfa()
     symb_a = Symbol("a")
     symb_b = Symbol("b")
     self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))
     self.assertFalse(dfa.accepts([symb_b, symb_b, symb_a]))
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
     cfg_i = cfg.intersection(regex)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
     cfg_i = cfg.intersection(dfa)
     self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
     self.assertFalse(cfg_i.contains([ter_a, ter_a, ter_b]))
     self.assertTrue(cfg_i.contains([]))
def from_regex_file(path: str):
    file = open(path)
    regex = Regex(file.readline())
    file.close()
    dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa(
    ).to_deterministic().minimize()
    return from_dfa(dfa)
Exemple #4
0
    def test_creation(self):
        # S -> a S b | a b
        enfa = Regex("a S b | a b").to_epsilon_nfa()
        dfa = enfa.minimize()
        box = Box(dfa, Symbol("S"))
        rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box})

        self.assertEqual(rsa_1.get_number_of_boxes(), 1)
        self.assertEqual(box, rsa_1.get_box(Symbol("S")))
        self.assertEqual(rsa_1.labels, {Symbol("S")})
        self.assertEqual(rsa_1.initial_label, Symbol("S"))

        rsa_2 = RecursiveAutomaton()
        rsa_2.add_box(box)
        rsa_2.change_initial_label(Symbol("S"))

        self.assertEqual(rsa_2, rsa_1)

        # Checking to add a start label
        rsa_3 = RecursiveAutomaton(set(), Symbol("S"), {box})
        self.assertEqual(rsa_3.labels, {Symbol("S")})

        try:
            rsa_4 = RecursiveAutomaton({Symbol("S"), Symbol("v")}, Symbol("S"),
                                       {box})
        except ValueError:
            self.assertEqual(True, True)
Exemple #5
0
    def test_is_equivalent_to(self):
        # S -> a* b*
        rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), Symbol("S"))

        # S -> a+ b+
        rsa_2 = RecursiveAutomaton.from_regex(Regex("a a* b b*"), Symbol("S"))

        self.assertNotEqual(rsa_1, rsa_2)
Exemple #6
0
def test_regex_to_epsilon_nfa():
    regex = Regex("(a|a a b)*")
    enfa = regex.to_epsilon_nfa()
    assert (len(enfa.states) == 12)
    assert (enfa.accepts([symb_a]))
    assert (enfa.accepts([symb_a, symb_a, symb_b, epsilon]))
    assert (not enfa.accepts([symb_c]))
    assert (enfa.accepts([epsilon]))
    assert (not enfa.accepts([symb_b, symb_a]))
Exemple #7
0
def test_dfa_intersection():
    regex = Regex("(a |a b c)*")
    dfa1 = DFA.min_dfa_from_regex(regex)
    regex = Regex("(a* | (a | b)*) c")
    dfa2 = DFA.min_dfa_from_regex(regex)
    new_dfa = dfa1.get_intersection(dfa2)
    assert (new_dfa.accepts([symb_a, symb_b, symb_c]))
    assert (new_dfa.accepts([symb_a, symb_a, symb_b, symb_c]))
    assert (not new_dfa.accepts([symb_a, symb_b, symb_c,
                                 symb_a, symb_b, symb_c]))
Exemple #8
0
    def test_from_regex(self):
        # S -> a*
        rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), Symbol("S"))

        enfa = Regex("a*").to_epsilon_nfa()
        dfa = enfa.minimize()
        box = Box(dfa, Symbol("S"))
        rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box})

        self.assertEqual(rsa_2, rsa_1)
Exemple #9
0
    def from_text(cls, text: List[str]):
        start_symbol = None
        eps_productions = []
        productions_with_dfa = []
        for line in text:
            raw_head, *raw_body = line.strip().split(' ', 1)
            regex = Regex(' '.join(raw_body).replace('eps', 'epsilon'))
            head = Variable(raw_head)
            if start_symbol is None:
                start_symbol = head
            if not raw_body:
                eps_productions.append(Production(head, []))
            dfa: DeterministicFiniteAutomaton = regex.to_epsilon_nfa(
            ).to_deterministic().minimize()
            productions_with_dfa.append((head, dfa))

        import wrappers.GraphWrapper
        rfa_graph = wrappers.GraphWrapper.empty()
        rfa_graph.matrix_size = sum(
            [len(dfa.states) for _, dfa in productions_with_dfa])
        rfa_graph.vertices = set()
        empty_matrix = Matrix.sparse(types.BOOL, rfa_graph.matrix_size,
                                     rfa_graph.matrix_size)
        head_by_start_final_pair = {}
        total_states_counter = 0

        for head, dfa in productions_with_dfa:
            transitions = dfa._transition_function._transitions
            num_by_state = {}
            for state in dfa.states:
                num_by_state[state] = total_states_counter
                total_states_counter += 1
            rfa_graph.vertices.update(num_by_state.values())

            for start_state in dfa.start_states:
                rfa_graph.start_states.add(num_by_state[start_state])
            for final_state in dfa.final_states:
                rfa_graph.final_states.add(num_by_state[final_state])
                head_by_start_final_pair[
                    num_by_state[dfa.start_state],
                    num_by_state[final_state]] = head.value

            for state_from in transitions:
                for edge_symb in transitions[state_from]:
                    state_to = transitions[state_from][edge_symb]
                    matrix = rfa_graph.label_to_bool_matrix.setdefault(
                        edge_symb, empty_matrix.dup())
                    matrix[num_by_state[state_from],
                           num_by_state[state_to]] = True

        return cls(rfa_graph, head_by_start_final_pair, eps_productions,
                   start_symbol)
 def __init__(self, regex):
     enfa = Regex(regex).to_epsilon_nfa()
     self.dfa = enfa.to_deterministic().minimize()
     states = self.dfa.states
     vertices = range(len(states))
     self.vert_dict = dict(zip(states, vertices))
     self.num_vert = len(self.vert_dict)  
     self.start_states = [
         self.vert_dict[st] for st in self.dfa.start_states
     ]
     self.final_states = [
         self.vert_dict[st] for st in self.dfa.final_states
     ]
Exemple #11
0
def test_simple_regex():
    regexes = ["a", "a b", "a | b", "a*"]
    answers = ["a", "ab", "b", "aaa"]
    for i in range(4):
        regex = DifferentiableRegex(Regex(regexes[i]))
        word = answers[i]
        assert (regex.accepts(word))
    def read_grammar_with_regex(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                line = production_txt.strip().split()
                head, body = line[0], ' '.join(line[1:])
                head = Variable(head)

                if start_symb is None:
                    start_symb = head

                new_productions, new_variables, new_terminals, id = CFGrammar.read_production_regex(
                    head, Regex(body), id)

                productions |= new_productions
                variables |= new_variables
                terminals |= new_terminals

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
Exemple #13
0
    def from_cfg(cls, cfg: CFG):
        """ Create a recursive automaton from context-free grammar

        Parameters
        -----------
        cfg : :class:`~pyformlang.cfg.CFG`
            The context-free grammar

        Returns
        -----------
        rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
            The new recursive automaton built from context-free grammar
        """

        initial_label = to_symbol(cfg.start_symbol)
        grammar_in_true_format = remove_repetition_of_nonterminals_from_productions(
            cfg.to_text())

        boxes = set()
        labels = set()
        notation_for_epsilon = Epsilon().to_text()
        for production in grammar_in_true_format.splitlines():
            head, body = production.split(" -> ")
            labels.add(to_symbol(head))

            if body == "":
                body = notation_for_epsilon

            boxes.add(
                Box(Regex(body).to_epsilon_nfa().minimize(), to_symbol(head)))

        return RecursiveAutomaton(labels, initial_label, boxes)
Exemple #14
0
    def read_cfg(cls, text, start_symbol=cfg.Variable("S"), contains_regexes=False, track_variables=False):
        variables = set()
        productions = set()
        terminals = set()

        if track_variables:
            for line in text.splitlines():
                head = line.strip().split(' ', 1)[0]
                variables.add(cfg.Variable(head))
        
        for line in text.splitlines():
            if contains_regexes and \
               len(line.split()) > 1 and \
               len(line.strip().split(' ', 1)[1]) > 1 and \
               any(symb in line for symb in ['*', '|', '+', '?', ]):
                raw_head, *raw_body = line.strip().split(' ', 1)
                regex = Regex.from_python_regex(' '.join(raw_body))
                head = cfg.Variable(raw_head)
                cur_cfg = cls._create_cfg_from_regex(head, regex, track_variables)
                terminals.update(cur_cfg.terminals)
                productions.update(cur_cfg.productions)
                variables.update(cur_cfg.variables)
            else:
                line = line.strip()
                if not line:
                    continue
                if track_variables:
                    tmp_vars = set()
                    cls._read_line(line, productions, terminals, tmp_vars)
                else:
                    cls._read_line(line, productions, terminals, variables)
        return cls(variables=variables, terminals=terminals,
                   productions=productions, start_symbol=start_symbol)
Exemple #15
0
def test_comp_regex_redundant():
    regexes = ['a *', 'a | a * | a | a *', 'a * * * * *', '(a | ) *']
    answers = ['', 'a', 'aaa', 'aaaaaaaa']
    for raw_regex in regexes:
        regex = DifferentiableRegex(Regex(raw_regex))
        for word in answers:
            assert (regex.accepts(word))
Exemple #16
0
 def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG:
     dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
     transitions = dfa._transition_function._transitions
     state_to_var: Dict[State, Variable] = {}
     productions, terms, vars = set(), set(), set()
     for state in dfa.states:
         state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}')
         cls.__var_state_counter += 1
     vars.update(state_to_var.values())
     for start_state in dfa.start_states:
         productions.add(Production(head, [state_to_var[start_state]]))
     for state_from in transitions:
         for edge_symb in transitions[state_from]:
             state_to = transitions[state_from][edge_symb]
             current_prod_head = state_to_var[state_from]
             current_prod_body = []
             if (not variables and edge_symb.value.isupper()
                     or variables and edge_symb.value in variables):
                 var = Variable(edge_symb.value)
                 vars.add(var)
                 current_prod_body.append(var)
             else:
                 term = Terminal(edge_symb.value)
                 terms.add(term)
                 current_prod_body.append(term)
             current_prod_body.append(state_to_var[state_to])
             productions.add(Production(current_prod_head, current_prod_body))
             if state_to in dfa.final_states:
                 productions.add(Production(state_to_var[state_to], []))
     if not productions:
         return CFG(vars, terms, head, {Production(head, [])})
     return CFG(vars, terms, head, productions)
Exemple #17
0
    def from_str(st, py=True):
        if py:
            e_dfa = Regex.from_python_regex(st).to_epsilon_nfa()
        else:
            e_dfa = Regex(st).to_epsilon_nfa()
        dfa = e_dfa.to_deterministic().minimize()

        dfa, states_map = SimpleGraph.dfa_normalize_states(dfa)
        edges = []
        size = 0
        for vs, labels in dfa.to_dict().items():
            for label, ve in labels.items():
                vs, ve = int(str(vs)), int(str(ve))
                label = str(label)
                size = max(size, vs, ve)
                edges.append((vs, label, ve))
        return Regexp(size + 1, edges, dfa, states_map)
Exemple #18
0
    def from_regex(self, filename):
        input_file = open(filename)
        regex = Regex(input_file.read().rstrip())
        dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
        self.n_vertices = len(dfa.states)
        state_renumeration = dict()
        i = 0
        for state in dfa.states:
            state_renumeration[state] = i
            i += 1
            
        for fro, label, to in dfa._transition_function.get_edges():
            self.get_by_label(str(label))[state_renumeration[fro], state_renumeration[to]] = True

        self.start_vertices.add(state_renumeration[dfa.start_state])

        for state in dfa.final_states:
            self.final_vertices.add(state_renumeration[state])
Exemple #19
0
def str_to_graph(s):
    r = Regex(s)
    a = r.to_epsilon_nfa().minimize()
    start_states = list(a.start_states)
    final_states = list(a.final_states)
    g = a.to_networkx()
    g2 = nx.convert_node_labels_to_integers(g, ordering="sorted")
    d = {}
    i = 0
    for node in sorted(g.nodes):
        d[node] = sorted(g2.nodes)[i]
        i += 1
    for i in range(len(start_states)):
        start_states[i] = d[start_states[i]]
    for i in range(len(final_states)):
        final_states[i] = d[final_states[i]]
    labels = nx.get_edge_attributes(g2, 'label')
    return labels, sorted(start_states), sorted(final_states)
Exemple #20
0
def test_comp_regex():
    regex = DifferentiableRegex(Regex('(a * b | c) * d'))
    correct = ['cd', 'aaabd', 'aabcd', 'abbd']
    incorrect = ['dd', 'aaaad', 'ababc', 'q']
    for word in correct:
        assert (regex.accepts(word))

    for word in incorrect:
        assert (not regex.accepts(word))
Exemple #21
0
def regex_to_pda_graph(regex, first_node_number):
    regex = Regex(regex)
    nfa: EpsilonNFA = regex.to_epsilon_nfa().minimize()
    graph: nx.MultiDiGraph = nfa.to_networkx()
    killing_list = []
    for node in graph.nodes:
        if not graph.nodes[node]['label']:
            killing_list.append(node)
    for node in killing_list:
        graph.remove_node(node)
    my_map = {}
    i = first_node_number
    for node in sorted(graph.nodes):
        my_map[node] = i
        i += 1
    graph: nx.Graph = nx.relabel_nodes(graph, my_map)
    for edge in graph.edges:
        graph.edges[edge]['label'] = [graph.edges[edge]['label']]
    return nx.DiGraph(graph)
Exemple #22
0
def test_regex_to_min_dfa():
    regex = Regex("((b|a) a b)*")
    dfa = DFA.min_dfa_from_regex(regex)
    assert (len(dfa.states) == 3)
    assert(dfa.accepts([symb_b, symb_a, symb_b]))
    assert(dfa.accepts([symb_a, symb_a, symb_b, symb_a, symb_a, symb_b]))
    assert(not dfa.accepts([symb_a, symb_a, symb_b, symb_b]))
    assert(not dfa.accepts([symb_a, symb_a, symb_b, epsilon]))
    assert(not dfa.accepts([symb_c]))
    assert(not dfa.accepts([epsilon]))
    assert(not dfa.accepts([symb_b, symb_a]))
Exemple #23
0
 def from_text(cls, text: List[str], use_python_regexes_if_necessary=False, variables=None):
     vars, terms, prods = set(), set(), set()
     start_var = None
     for line in text:
         if not line.strip():
             continue
         raw_head, *raw_body = line.strip().split(' ', 1)
         if raw_body and any([spec in raw_body[0] for spec in ['|', '.', '?', '+', '-']]):
             if '-' in raw_body[0] and use_python_regexes_if_necessary:
                 regex = Regex.from_python_regex(raw_body[0])
             else:
                 regex = Regex(raw_body[0])
             head = Variable(raw_head)
             if start_var is None:
                 start_var = head
             cur_cfg = cls._create_cfg_from_regex(head, regex, variables)
             vars.update(cur_cfg.variables)
             terms.update(cur_cfg.terminals)
             prods.update(cur_cfg.productions)
         else:
             raw_body = raw_body[0].split(' ') if raw_body else ''
             if start_var is None:
                 start_var = Variable(raw_head)
             head = Variable(raw_head)
             vars.add(head)
             body = []
             for element in raw_body:
                 if element == 'eps':
                     continue
                 elif (not variables and any(letter.isupper() for letter in element)
                       or variables and element in variables):
                     var = Variable(element)
                     vars.add(var)
                     body.append(var)
                 else:
                     term = Terminal(element)
                     terms.add(term)
                     body.append(term)
             prods.add(Production(head, body))
     cfg = CFG(vars, terms, start_var, prods)
     return cls(cfg)
Exemple #24
0
    def test_from_cfg(self):
        # g1: S -> a S b | a b
        rsa1_g1 = RecursiveAutomaton.from_cfg(
            CFG.from_text("S -> a S b | a b"))
        rsa2_g1 = RecursiveAutomaton.from_regex(Regex("a S b | a b"),
                                                Symbol("S"))

        self.assertEqual(rsa1_g1, rsa2_g1)

        # g2: S -> a V b
        #     V -> c S d | c d
        rsa1_g2 = RecursiveAutomaton.from_cfg(
            CFG.from_text("S -> a V b\nV -> c S d | c d"))
        self.assertEqual(rsa1_g2.get_number_of_boxes(), 2)
        self.assertEqual(rsa1_g2.labels, {Symbol("S"), Symbol("V")})

        dfa_S = Regex("a V b").to_epsilon_nfa().minimize()
        self.assertEqual(rsa1_g2.get_box(Symbol("S")), Box(dfa_S, Symbol("S")))

        dfa_V = Regex("c S d | c d").to_epsilon_nfa().minimize()
        self.assertEqual(rsa1_g2.get_box(Symbol("V")), Box(dfa_V, Symbol("V")))
Exemple #25
0
 def test_get_repr(self):
     regex0 = Regex("a*.(b|c)epsilon")
     regex_str = str(regex0)
     regex1 = Regex(regex_str)
     dfa0 = regex0.to_epsilon_nfa().to_deterministic().minimize()
     dfa1 = regex1.to_epsilon_nfa().to_deterministic().minimize()
     self.assertEqual(dfa0, dfa1)
Exemple #26
0
 def test_intersection_empty(self):
     regex = Regex("")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_b, var_s, ter_a]),
         Production(var_s, [])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg_i = cfg & regex
     self.assertFalse(cfg_i)
Exemple #27
0
    def parse_regex(self, file_path):
        self.__init__()
        # read regex from file
        regex_file = open(file_path, 'r')
        regex = Regex(regex_file.read().rstrip())
        regex_file.close()

        # regex to dfa conversion and vertices count init
        dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
        self.vertices_count = len(dfa.states)

        # states enumeration
        states = {}
        start = 0
        for state in dfa._states:
            if state not in states:
                states[state] = start
                start = start + 1

        # init label_matrices
        for start in dfa._states:
            for label in dfa._input_symbols:
                in_states = dfa._transition_function(start, label)
                for end in in_states:
                    if label in self.label_matrices:
                        self.label_matrices[label][states[start],
                                                   states[end]] = True
                    else:
                        bool_matrix = Matrix.sparse(BOOL, self.vertices_count,
                                                    self.vertices_count)
                        bool_matrix[states[start], states[end]] = True
                        self.label_matrices[label] = bool_matrix

        # init start and terminal states
        self.start_vertices.add(states[dfa.start_state])
        for state in dfa._final_states:
            self.terminal_vertices.add(states[state])
        return self
Exemple #28
0
def read_cfgrammar(name):
    file = open(name, 'r')

    s = ''
    cfg_from_regex = []
    for line in file:
        if 'regexp' in line:
            line = line.replace('regexp', "")
            head = line.split(" -> ")[0]
            regex = Regex(line.split(" -> ")[1][:-1])
            cfg_from_regex.append(regex.to_cfg(starting_symbol=head))
        else:
            s += line

    file.close()
    cfg = CFG.from_text(s)

    for c in cfg_from_regex:
        cfg = CFG(cfg.variables.union(c.variables),
                  cfg.terminals.union(c.terminals), cfg.start_symbol,
                  cfg.productions.union(c.productions))

    return cfg
Exemple #29
0
def rsm_from_text(source: str, start_symbol: Variable = Variable("S")) -> RSM:
    """Create a Recursive State Machine [1]_ from text.

    Parameters
    ----------
    source : str
        The text with which
        the Recursive State Machine
        will be created.

    start_symbol : Variable
        Start symbol of a Recursive State Machine.

    Examples
    --------
    >>> import cfpq_data
    >>> rsm = cfpq_data.rsm_from_text("S -> (a S* b S*)*")
    >>> [rsm.contains(word) for word in ["", "ab", "aabb"]]
    [True, True, True]

    Returns
    -------
    rsm : RSM
        Recursive State Machine.

    References
    ----------
    .. [1] Alur R., Etessami K., Yannakakis M. (2001) Analysis of Recursive State Machines. In: Berry G.,
       Comon H., Finkel A. (eds) Computer Aided Verification. CAV 2001.
       Lecture Notes in Computer Science, vol 2102.
       Springer, Berlin, Heidelberg. https://doi.org/10.1007/3-540-44585-4_18
    """
    boxes = list()

    for production in source.splitlines():
        if " -> " not in production:
            continue

        head, body = production.split(" -> ")

        body = body.replace("epsilon", "$").replace("eps", "$")
        if body == "":
            body = "$"

        boxes.append(
            (Variable(head), Regex(body).to_epsilon_nfa().to_deterministic().minimize())
        )

    return RSM(start_symbol, boxes)
Exemple #30
0
    def from_text(text, start_symbol=Variable("S")):
        lines = text.splitlines()
        production_set = set()

        for line in lines:
            production = line.split(' -> ')
            head = Variable(production[0])
            body_str = production[1].strip()

            body_str = body_str.replace('?', f'|{EPS_SYM}')

            production_set |= Grammar_Wrapper.regex_to_production(
                Regex(body_str), head)

        return CFG(start_symbol=start_symbol, productions=production_set)