예제 #1
0
    def read_cfg(cls, text, start_symbol=cfg.Variable("S"), contains_regexes=False, track_variables=False):
        variables = set()
        productions = set()
        terminals = set()

        if track_variables:
            for line in text.splitlines():
                head = line.strip().split(' ', 1)[0]
                variables.add(cfg.Variable(head))
        
        for line in text.splitlines():
            if contains_regexes and \
               len(line.split()) > 1 and \
               len(line.strip().split(' ', 1)[1]) > 1 and \
               any(symb in line for symb in ['*', '|', '+', '?', ]):
                raw_head, *raw_body = line.strip().split(' ', 1)
                regex = Regex.from_python_regex(' '.join(raw_body))
                head = cfg.Variable(raw_head)
                cur_cfg = cls._create_cfg_from_regex(head, regex, track_variables)
                terminals.update(cur_cfg.terminals)
                productions.update(cur_cfg.productions)
                variables.update(cur_cfg.variables)
            else:
                line = line.strip()
                if not line:
                    continue
                if track_variables:
                    tmp_vars = set()
                    cls._read_line(line, productions, terminals, tmp_vars)
                else:
                    cls._read_line(line, productions, terminals, variables)
        return cls(variables=variables, terminals=terminals,
                   productions=productions, start_symbol=start_symbol)
예제 #2
0
    def from_str(st, py=True):
        if py:
            e_dfa = Regex.from_python_regex(st).to_epsilon_nfa()
        else:
            e_dfa = Regex(st).to_epsilon_nfa()
        dfa = e_dfa.to_deterministic().minimize()

        dfa, states_map = SimpleGraph.dfa_normalize_states(dfa)
        edges = []
        size = 0
        for vs, labels in dfa.to_dict().items():
            for label, ve in labels.items():
                vs, ve = int(str(vs)), int(str(ve))
                label = str(label)
                size = max(size, vs, ve)
                edges.append((vs, label, ve))
        return Regexp(size + 1, edges, dfa, states_map)
예제 #3
0
 def from_text(cls, text: List[str], use_python_regexes_if_necessary=False, variables=None):
     vars, terms, prods = set(), set(), set()
     start_var = None
     for line in text:
         if not line.strip():
             continue
         raw_head, *raw_body = line.strip().split(' ', 1)
         if raw_body and any([spec in raw_body[0] for spec in ['|', '.', '?', '+', '-']]):
             if '-' in raw_body[0] and use_python_regexes_if_necessary:
                 regex = Regex.from_python_regex(raw_body[0])
             else:
                 regex = Regex(raw_body[0])
             head = Variable(raw_head)
             if start_var is None:
                 start_var = head
             cur_cfg = cls._create_cfg_from_regex(head, regex, variables)
             vars.update(cur_cfg.variables)
             terms.update(cur_cfg.terminals)
             prods.update(cur_cfg.productions)
         else:
             raw_body = raw_body[0].split(' ') if raw_body else ''
             if start_var is None:
                 start_var = Variable(raw_head)
             head = Variable(raw_head)
             vars.add(head)
             body = []
             for element in raw_body:
                 if element == 'eps':
                     continue
                 elif (not variables and any(letter.isupper() for letter in element)
                       or variables and element in variables):
                     var = Variable(element)
                     vars.add(var)
                     body.append(var)
                 else:
                     term = Terminal(element)
                     terms.add(term)
                     body.append(term)
             prods.add(Production(head, body))
     cfg = CFG(vars, terms, start_var, prods)
     return cls(cfg)
예제 #4
0
    def from_regexp(path):
        graph = Graph()
        with open(path, 'r') as file:
            dfa = Regex.from_python_regex(
                file.read()).to_epsilon_nfa().to_deterministic().minimize()
        file.close()

        state_counter = 0
        dfa_states = {}
        for state in dfa._states:
            if state not in dfa_states:
                dfa_states[state] = state_counter
                state_counter += 1
        graph.size = state_counter

        for state in dfa._states:
            for symbol in dfa._input_symbols:
                reachable_states = dfa._transition_function(state, symbol)
                for out_state in reachable_states:
                    # add all edges in boolean matrix
                    if symbol in graph.label_dictionary:
                        graph.label_dictionary[symbol][
                            dfa_states[state], dfa_states[out_state]] = 1
                    else:
                        boolean_matrix = Matrix.sparse(BOOL, graph.size,
                                                       graph.size)
                        boolean_matrix[dfa_states[state],
                                       dfa_states[out_state]] = 1
                        graph.label_dictionary[symbol] = boolean_matrix
                    graph.vertices.add(dfa_states[state])
                    graph.vertices.add(dfa_states[out_state])

        # sync start and final states
        graph.start_states = [dfa_states[dfa.start_state]]
        for final_state in dfa._final_states:
            graph.final_states.append(dfa_states[final_state])

        return graph
예제 #5
0
 def test_from_python_brackets(self):
     regex = Regex.from_python_regex("a[bc]")
     self.assertTrue(regex.accepts(["a", "b"]))
     self.assertTrue(regex.accepts(["a", "c"]))
     self.assertFalse(regex.accepts(["a", "b", "c"]))
     self.assertFalse(regex.accepts(["a", "a"]))
예제 #6
0
 def test_from_python_simple(self):
     regex = Regex.from_python_regex("abc")
     self.assertTrue(regex.accepts(["a", "b", "c"]))
     self.assertFalse(regex.accepts(["a", "b", "b"]))
     self.assertFalse(regex.accepts(["a", "b"]))
    def read_grammar(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, _, *body_full = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                tmp_body = []
                bodies = [
                    list(group)
                    for k, group in groupby(body_full, lambda x: x == "|")
                    if not k
                ]

                for body in bodies:

                    is_regex = not any([
                        True if '*' not in value else False for value in body
                    ])

                    if is_regex:
                        new_productions, new_variables, new_terminals, id = CFGrammar \
                                                                            .read_production_regex(head, Regex.from_python_regex(body[0]), id, False)

                        productions |= new_productions
                        variables |= new_variables
                        terminals |= new_terminals
                    else:
                        body_cfg = []
                        for letter in body:
                            if letter == "epsilon":
                                body_cfg.append(Epsilon())
                            elif letter.isupper():
                                non_terminal = Variable(letter)
                                variables.add(non_terminal)
                                body_cfg.append(non_terminal)
                            else:
                                terminal = Terminal(letter)
                                terminals.add(terminal)
                                body_cfg.append(terminal)

                        productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
 def from_regex(cls, regex: str, is_python_regex=True):
     if is_python_regex:
         pyformlang_regex = Regex.from_python_regex(regex)
     else:
         pyformlang_regex = Regex(regex)
     return RegexGraphWrapper(pyformlang_regex.to_epsilon_nfa().minimize())