예제 #1
0
    def _test_profiling_intersection(self):
        size = 50
        states = [State(i) for i in range(size * 2 + 1)]
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton(states, {symb_a, symb_b},
                                           start_state=states[0],
                                           final_states={states[-1]})
        for i in range(size):
            dfa.add_transition(states[i], symb_a, states[i + 1])
        for i in range(size, size * 2):
            dfa.add_transition(states[i], symb_b, states[i + 1])

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = [
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        ]
        cfg = CFG(productions=productions, start_symbol=var_s)
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a] * size + [ter_b] * size))
        self.assertFalse(cfg_i.contains([]))
예제 #2
0
    def test_intersection_dfa2(self):
        state0 = State(0)
        symb_a = Symbol("a")
        symb_b = Symbol("b")
        dfa = DeterministicFiniteAutomaton({state0}, {symb_a, symb_b},
                                           start_state=state0,
                                           final_states={state0})
        dfa.add_transition(state0, symb_a, state0)
        dfa.add_transition(state0, symb_b, state0)
        self.assertTrue(dfa.accepts([symb_a, symb_a, symb_b, symb_b]))

        ter_a = Terminal("a")
        ter_b = Terminal("b")
        var_s = Variable("S")
        var_s1 = Variable("S1")
        var_l = Variable("L")
        productions = {
            Production(var_s, [var_l, var_s1]),
            Production(var_l, [Epsilon()]),
            Production(var_s1, [ter_a, var_s1, ter_b]),
            Production(var_s1, [ter_b, var_s1, ter_a]),
            Production(var_s1, [])
        }
        cfg = CFG(productions=productions, start_symbol=var_s)
        self.assertTrue(cfg.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertFalse(cfg.contains([ter_a, ter_a, ter_b]))
        cfg_i = cfg.intersection(dfa)
        self.assertFalse(cfg_i.is_empty())
        self.assertTrue(cfg_i.contains([ter_a, ter_a, ter_b, ter_b]))
        self.assertTrue(cfg_i.contains([]))
예제 #3
0
def cfg_from_cnf(cnf: CFG) -> CFG:
    """Create a context-free grammar [2]_
    from given context-free grammar
    in Chomsky normal form [1]_.

    Parameters
    ----------
    cnf : CFG
        Context free grammar
        in Chomsky normal form.

    Examples
    --------
    >>> import cfpq_data
    >>> cnf = cfpq_data.cnf_from_text("S -> a S b S | epsilon")
    >>> cfg = cfpq_data.cfg_from_cnf(cnf)
    >>> [cfg.contains(word) for word in ["", "ab", "aabb"]]
    [True, True, True]

    Returns
    -------
    cfg : CFG
        Context-free grammar.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form
    .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions
    """
    return CFG.from_text(cnf.to_text(), cnf.start_symbol)
예제 #4
0
def part2(file='input_test.txt'):
    rules, messages = get_input(file)

    # 8: 42 | 42 8
    # 11: 42 31 | 42 11 31
    rules['8'] = '42 | 42 8'
    rules['11'] = '42 31 | 42 11 31'

    rule_variables = set()
    rule_products = set()

    for rule in rules:
        subs = rules[rule].split(' | ')
        rule_variables.add(Variable(rule))
        for sub in subs:
            if sub == '"a"' or sub == '"b"':
                rule_products.add(
                    Production(Variable(rule),
                               [Terminal(sub.replace('"', ''))]))
            else:
                rule_products.add(
                    Production(Variable(rule),
                               [Variable(x) for x in sub.split(' ')]))

    cfg = CFG(rule_variables, {Terminal('a'), Terminal('b')}, Variable('0'),
              rule_products)
    count = 0

    for message in messages:
        if cfg.contains(message):
            count += 1

    print('Part 2: Solution {}'.format(count))
예제 #5
0
    def __init__(self, rules: Iterable[str], patch: bool = False):
        start_var: Variable
        vars: set[Variable] = set()
        terminals: Set[Terminal] = set()
        productions: Set[Production] = set()

        for rule in rules:
            i, r = rule.split(": ")
            var = Variable(i)
            if i == "0":
                start_var = var
            if r[0] == '"':
                ter = Terminal(r[1])
                terminals.add(ter)
                productions.add(Production(var, [ter]))
                continue
            if patch:
                if i == "8":
                    r = "42 | 42 8"
                if i == "11":
                    r = "42 31 | 42 11 31"
            rr = r.split(" | ")
            for r in rr:
                productions.add(
                    Production(var, [Variable(x) for x in r.split(" ")]))

        self.CFG = CFG(vars, terminals, start_var, productions)
예제 #6
0
 def _create_cfg_from_regex(cls, head: Variable, regex: Regex, variables=None) -> CFG:
     dfa = regex.to_epsilon_nfa().to_deterministic().minimize()
     transitions = dfa._transition_function._transitions
     state_to_var: Dict[State, Variable] = {}
     productions, terms, vars = set(), set(), set()
     for state in dfa.states:
         state_to_var[state] = Variable(f'{state}:{cls.__var_state_counter}')
         cls.__var_state_counter += 1
     vars.update(state_to_var.values())
     for start_state in dfa.start_states:
         productions.add(Production(head, [state_to_var[start_state]]))
     for state_from in transitions:
         for edge_symb in transitions[state_from]:
             state_to = transitions[state_from][edge_symb]
             current_prod_head = state_to_var[state_from]
             current_prod_body = []
             if (not variables and edge_symb.value.isupper()
                     or variables and edge_symb.value in variables):
                 var = Variable(edge_symb.value)
                 vars.add(var)
                 current_prod_body.append(var)
             else:
                 term = Terminal(edge_symb.value)
                 terms.add(term)
                 current_prod_body.append(term)
             current_prod_body.append(state_to_var[state_to])
             productions.add(Production(current_prod_head, current_prod_body))
             if state_to in dfa.final_states:
                 productions.add(Production(state_to_var[state_to], []))
     if not productions:
         return CFG(vars, terms, head, {Production(head, [])})
     return CFG(vars, terms, head, productions)
예제 #7
0
 def test_derivation_empty(self):
     var_s = Variable("S")
     productions = [Production(var_s, [Epsilon()])]
     cfg = CFG(productions=productions, start_symbol=var_s)
     parse_tree = cfg.get_cnf_parse_tree([])
     derivation = parse_tree.get_rightmost_derivation()
     self.assertEqual([[var_s], []], derivation)
예제 #8
0
 def test_to_pda(self):
     """ Tests the conversion to PDA """
     var_e = Variable("E")
     var_i = Variable("I")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_0 = Terminal("0")
     ter_1 = Terminal("1")
     ter_par_open = Terminal("(")
     ter_par_close = Terminal(")")
     ter_mult = Terminal("*")
     ter_plus = Terminal("+")
     productions = {
         Production(var_e, [var_i]),
         Production(var_e, [var_e, ter_plus, var_e]),
         Production(var_e, [var_e, ter_mult, var_e]),
         Production(var_e, [ter_par_open, var_e, ter_par_close]),
         Production(var_i, [ter_a]),
         Production(var_i, [ter_b]),
         Production(var_i, [var_i, ter_a]),
         Production(var_i, [var_i, ter_b]),
         Production(var_i, [var_i, ter_0]),
         Production(var_i, [var_i, ter_1]),
         Production(var_i, [var_i, Epsilon()])
     }
     cfg = CFG({var_e, var_i}, {
         ter_a, ter_b, ter_0, ter_1, ter_par_open, ter_par_close, ter_mult,
         ter_plus
     }, var_e, productions)
     pda = cfg.to_pda()
     self.assertEqual(len(pda.states), 1)
     self.assertEqual(len(pda.final_states), 0)
     self.assertEqual(len(pda.input_symbols), 8)
     self.assertEqual(len(pda.stack_symbols), 10)
     self.assertEqual(pda.get_number_transitions(), 19)
예제 #9
0
def part2():
    rules, words = open("in.txt").read().split("\n\n")

    rules = rules.replace("8: 42", "8: 42 | 42 8")
    rules = rules.replace("11: 42 31", "11: 42 31 | 42 11 31")

    variables = set()
    productions = set()
    terminals = set()
    for line in rules.split("\n"):
        left, right = line.split(":")
        left = Variable(left)
        variables.add(left)
        for expression in right.split("|"):
            if '"' in expression:  # Terminal expression
                expression = expression.strip('" ')
                right = [Terminal(expression)]
                terminals.add(Terminal(expression))
                productions.add(Production(left, right))
            else:
                right = [
                    Variable(token) for token in expression.strip().split()
                ]
                productions.add(Production(left, right))

    cfg = CFG(variables, terminals, Variable("0"), productions)
    count = sum(map(lambda x: 1 if cfg.contains(x) else 0, words.split("\n")))
    print(count)
예제 #10
0
 def test_to_text_cnf(self):
     cfg = CFG.from_text("S -> a S b | a b")
     cnf = cfg.to_normal_form()
     self.assertTrue(cnf.contains(["a", "b"]))
     new_text = cnf.to_text()
     print(new_text)
     new_cfg = CFG.from_text(new_text)
     self.assertTrue(new_cfg.contains(["a", "b"]))
예제 #11
0
 def test_derivation_does_not_exist(self):
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     cfg = CFG(productions=[], start_symbol=var_s)
     with self.assertRaises(DerivationDoesNotExist):
         parse_tree = cfg.get_cnf_parse_tree([ter_a, ter_b])
         parse_tree.get_rightmost_derivation()
예제 #12
0
    def cfpq_matrix_product(graph: Graph, grammar: CFG):
        if graph.vertices_count == 0:
            return False

        result = dict()
        terminal_productions = set()
        non_terminal_productions = set()

        if grammar.generate_epsilon():
            matrix = Matrix.sparse(BOOL, graph.vertices_count,
                                   graph.vertices_count)
            matrix += Matrix.identity(BOOL, graph.vertices_count)
            result[grammar.start_symbol] = matrix

        cfg = grammar.to_normal_form()

        for production in cfg.productions:
            if len(production.body) == 1:
                terminal_productions.add(production)
            else:
                non_terminal_productions.add(production)

        for t, matrix in graph.label_matrices.items():
            for production in terminal_productions:
                if production.body == [Terminal(t)]:
                    if production.head not in result:
                        result[production.head] = matrix.dup()
                    else:
                        result[production.head] += matrix.dup()

        old_changed = set()
        new_changed = cfg.variables

        while len(new_changed) > 0:
            old_changed = new_changed
            new_changed = set()

            for production in non_terminal_productions:
                if production.body[0] not in result or production.body[
                        1] not in result:
                    continue

                if (production.body[0] in old_changed
                        or production.body[1] in old_changed):
                    matrix = result.get(
                        production.head,
                        Matrix.sparse(BOOL, graph.vertices_count,
                                      graph.vertices_count))
                    old_nvals = matrix.nvals
                    result[production.head] = matrix + (result[
                        production.body[0]] @ result[production.body[1]])

                    if result[production.head].nvals != old_nvals:
                        new_changed.add(production.head)

        return result.get(
            cfg.start_symbol,
            Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count))
예제 #13
0
    def cfpq_matrix_multiplication(grammar: CFG, graph: BMGraph):
        res = dict()
        terminal_prods = set()
        nonterminal_prods = set()

        if grammar.generate_epsilon():
            matrix = Matrix.sparse(BOOL, graph.states_amount,
                                   graph.states_amount)
            for i in range(graph.states_amount):
                matrix[i, i] = True
            res[grammar.start_symbol] = matrix

        cfg = grammar.to_normal_form()

        for prod in cfg.productions:
            if len(prod.body) == 1:
                terminal_prods.add(prod)
            else:
                nonterminal_prods.add(prod)

        with semiring.LOR_LAND_BOOL:
            for t, matrix in graph.matrices.items():
                for prod in terminal_prods:
                    if prod.body == [Terminal(t)]:
                        if prod.head not in res:
                            res[prod.head] = matrix.dup()
                        else:
                            res[prod.head] += matrix.dup()

        with semiring.LOR_LAND_BOOL:
            old_changed = set()
            new_changed = cfg.variables

            while len(new_changed) > 0:
                old_changed = new_changed
                new_changed = set()

                for prod in nonterminal_prods:
                    if prod.body[0] not in res or prod.body[1] not in res:
                        continue

                    if (prod.body[0] in old_changed
                            or prod.body[1] in old_changed):
                        matrix = res.get(
                            prod.head,
                            Matrix.sparse(BOOL, graph.states_amount,
                                          graph.states_amount))
                        old_nvals = matrix.nvals
                        res[prod.head] = matrix + \
                            (res[prod.body[0]] @ res[prod.body[1]])

                        if (res[prod.head].nvals != old_nvals):
                            new_changed.add(prod.head)

        return res.get(
            cfg.start_symbol,
            Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
예제 #14
0
 def test_emptiness(self):
     """ Tests the emptiness of a CFG """
     # pylint: disable=too-many-locals
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     self.assertFalse(cfg.is_empty())
예제 #15
0
 def test_union(self):
     """ Tests the union of two cfg """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.union(cfg)
     self.assertEqual(len(new_cfg.variables), 3)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 6)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_a, ter_a, ter_b, ter_b]))
예제 #16
0
 def test_reverse(self):
     """ Test the reversal of a CFG """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.reverse()
     self.assertEqual(len(new_cfg.variables), 1)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 2)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(new_cfg.contains([ter_b, ter_b, ter_a, ter_a]))
예제 #17
0
 def _test_profiling_conversions():
     """ Tests multiple conversions """
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     ter_c = Terminal("c")
     var_s = Variable("S")
     productions = {
         Production(var_s, [ter_a, var_s, ter_b]),
         Production(var_s, [ter_c])
     }
     cfg = CFG(productions=productions, start_symbol=var_s)
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
     cfg.to_pda().to_final_state().to_empty_stack().to_cfg()
예제 #18
0
 def test_nullable_object(self):
     """ Tests the finding of nullable objects """
     var_a = Variable("A")
     var_b = Variable("B")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     start = Variable("S")
     prod0 = Production(start, [var_a, var_b])
     prod1 = Production(var_a, [ter_a, var_a, var_a])
     prod2 = Production(var_a, [Epsilon()])
     prod3 = Production(var_b, [ter_b, var_b, var_b])
     prod4 = Production(var_b, [Epsilon()])
     cfg = CFG({var_a, var_b, start}, {ter_a, ter_b}, start,
               {prod0, prod1, prod2, prod3, prod4})
     self.assertEqual(cfg.get_nullable_symbols(), {var_a, var_b, start})
예제 #19
0
 def test_substitution(self):
     """ Tests substitutions in a CFG """
     var_s = Variable("S")
     ter_a = Terminal("a")
     ter_b = Terminal("b")
     prod0 = Production(var_s, [ter_a, var_s, ter_b])
     prod1 = Production(var_s, [])
     cfg = CFG({var_s}, {ter_a, ter_b}, var_s, {prod0, prod1})
     new_cfg = cfg.substitute({ter_a: cfg})
     self.assertEqual(len(new_cfg.variables), 2)
     self.assertEqual(len(new_cfg.terminals), 2)
     self.assertEqual(len(new_cfg.productions), 4)
     self.assertFalse(new_cfg.is_empty())
     self.assertTrue(
         new_cfg.contains([ter_a, ter_b, ter_a, ter_b, ter_b, ter_b]))
예제 #20
0
    def __init__(self, start_symbol=None, productions=None):
        cfg = CFG(start_symbol=start_symbol, productions=productions)

        cnf = cfg.to_normal_form()
        # needed for language preservation
        if cfg.generate_epsilon():
            cnf._productions.add(Production(cnf._start_symbol, []))

        self.pair_productions = set()
        for p in cnf._productions:
            if len(p.body) == 2:
                self.pair_productions.add(p)

        super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol,
                                         productions=cnf._productions)
예제 #21
0
 def test_get_llone_table(self):
     # Example from:
     # https://www.geeksforgeeks.org/construction-of-ll1-parsing-table/
     text = """
         E  -> T E’
         E’ -> + T E’ | Є
         T  -> F T’
         T’ -> * F T’ | Є
         F  -> ( E ) | id
     """
     cfg = CFG.from_text(text, start_symbol="E")
     llone_parser = LLOneParser(cfg)
     parsing_table = llone_parser.get_llone_parsing_table()
     self.assertEqual(
         len(
             parsing_table.get(Variable("E"),
                               dict()).get(Terminal("id"), [])), 1)
     self.assertEqual(
         len(
             parsing_table.get(Variable("E"),
                               dict()).get(Terminal("+"), [])), 0)
     self.assertEqual(
         len(
             parsing_table.get(Variable("T’"),
                               dict()).get(Terminal(")"), [])), 1)
     self.assertEqual(
         len(
             parsing_table.get(Variable("F"),
                               dict()).get(Terminal("("), [])), 1)
     self.assertEqual(
         len(
             parsing_table.get(Variable("F"),
                               dict()).get(Terminal("id"), [])), 1)
예제 #22
0
파일: cfg.py 프로젝트: vdshk/CFPQ_Data
def cfg_to_text(cfg: CFG) -> str:
    """Turns a context-free grammar [1]_
    into its text representation.

    Parameters
    ----------
    cfg : CFG
        Context-free grammar.

    Examples
    --------
    >>> import cfpq_data
    >>> cfg = cfpq_data.cfg_from_text("S -> a S b S")
    >>> cfpq_data.cfg_to_text(cfg)
    'S -> a S b S\\n'

    Returns
    -------
    text : str
        Context-free grammar
        text representation.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions
    """
    return cfg.to_text()
예제 #23
0
파일: cfg.py 프로젝트: vdshk/CFPQ_Data
def cfg_from_text(source: str, start_symbol: Variable = Variable("S")) -> CFG:
    """Create a context-free grammar [1]_ from text.

    Parameters
    ----------
    source : str
        The text with which
        the context-free grammar
        will be created.

    start_symbol : Variable
        Start symbol of a context-free grammar.

    Examples
    --------
    >>> import cfpq_data
    >>> cfg = cfpq_data.cfg_from_text("S -> a S b S")
    >>> cfpq_data.cfg_to_text(cfg)
    'S -> a S b S\\n'

    Returns
    -------
    cfg : CFG
        Context-free grammar.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions
    """
    return CFG.from_text(source, start_symbol)
예제 #24
0
    def from_cfg(cls, cfg: CFG):
        """
        Build RSA from a given cfpq_data context-free grammar
        @param cfg: CFG on which RSA is built
        @return: initialized class
        """
        grammar = cfg.to_text()

        productions = dict()
        for line in grammar.split("\n")[:-1]:
            part_line = line.split(" -> ")
            right = part_line[1]
            if right == "":
                right = "epsilon"
            if part_line[0] in productions:
                productions[part_line[0]] += " | " + right
            else:
                productions[part_line[0]] = right

        grammar_new = ""
        for nonterminal in productions:
            grammar_new += nonterminal + " -> " + productions[nonterminal] + "\n"

        grammar_new = grammar_new[:-1]
        return RecursiveAutomaton.from_rsm(rsm_from_text(grammar_new))
예제 #25
0
def change_terminals_in_cfg(cfg: CFG, spec: Dict[str, str]) -> CFG:
    """Change terminals of
    a context-free grammar [1]_.

    Parameters
    ----------
    cfg : CFG
        Context-free grammar.

    spec: Dict
        Terminals mapping.

    Examples
    --------
    >>> import cfpq_data
    >>> cfg = cfpq_data.cfg_from_text("S -> a S b S")
    >>> new_cfg = cfpq_data.change_terminals_in_cfg(cfg, {"a": "b", "b": "c"})
    >>> new_cfg.to_text()
    'S -> b S c S\\n'

    Returns
    -------
    cfg : CFG
        Context-free grammar with changed terminals.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions
    """
    regex = re.compile("|".join(map(re.escape, spec.keys())))
    text = regex.sub(lambda match: spec[match.group(0)], cfg.to_text())
    return cfg_from_text(text)
예제 #26
0
 def test_get_follow_set(self):
     # Example from:
     # https://www.geeksforgeeks.org/follow-set-in-syntax-analysis/
     text = """
         E  -> T E’
         E’ -> + T E’ | Є
         T  -> F T’
         T’ -> * F T’ | Є
         F  -> ( E ) | id
     """
     cfg = CFG.from_text(text, start_symbol="E")
     llone_parser = LLOneParser(cfg)
     follow_set = llone_parser.get_follow_set()
     self.assertEqual(follow_set[Variable("E")], {"$", Terminal(")")})
     self.assertEqual(follow_set[Variable("E’")], {"$", Terminal(")")})
     self.assertEqual(
         follow_set[Variable("T")],
         {"$", Terminal("+"), Terminal(")")})
     self.assertEqual(
         follow_set[Variable("T’")],
         {"$", Terminal("+"), Terminal(")")})
     self.assertEqual(
         follow_set[Variable("F")],
         {"$", Terminal("+"),
          Terminal("*"), Terminal(")")})
예제 #27
0
 def test_get_first_set2(self):
     # Example from:
     # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/
     text = """
         S -> A C B | C b b | B a
         A -> d a | B C
         B -> g | Є
         C -> h | Є
     """
     cfg = CFG.from_text(text)
     llone_parser = LLOneParser(cfg)
     first_set = llone_parser.get_first_set()
     self.assertEqual(first_set[Variable("S")],
                      {Terminal(x)
                       for x in {"d", "g", "h", "b", "a"}
                       }.union({Epsilon()}))
     self.assertEqual(first_set[Variable("A")],
                      {Terminal(x)
                       for x in {"d", "g", "h"}}.union({Epsilon()}))
     self.assertEqual(first_set[Variable("B")],
                      {Terminal(x)
                       for x in {"g"}}.union({Epsilon()}))
     self.assertEqual(first_set[Variable("C")],
                      {Terminal(x)
                       for x in {"h"}}.union({Epsilon()}))
예제 #28
0
def rsm_from_cnf(cnf: CFG) -> RSM:
    """Create a Recursive State Machine [2]_
    from context-free grammar
    in Chomsky normal form [1]_.

    Parameters
    ----------
    cnf : CFG
        Context-free grammar
        in Chomsky normal form.

    Examples
    --------
    >>> import cfpq_data
    >>> cnf = cfpq_data.cnf_from_text("S -> a S b S | epsilon")
    >>> rsm = cfpq_data.rsm_from_cnf(cnf)
    >>> [rsm.contains(word) for word in ["", "ab", "aabb"]]
    [True, True, True]

    Returns
    -------
    rsm : RSM
        Recursive State Machine.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form
    .. [2] Alur R., Etessami K., Yannakakis M. (2001) Analysis of Recursive State Machines. In: Berry G.,
       Comon H., Finkel A. (eds) Computer Aided Verification. CAV 2001.
       Lecture Notes in Computer Science, vol 2102.
       Springer, Berlin, Heidelberg. https://doi.org/10.1007/3-540-44585-4_18
    """
    return rsm_from_text(cnf.to_text(), cnf.start_symbol)
    def read_grammar(cls, name):
        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                head, *body = production_txt.strip().split()

                if start_symb is None:
                    start_symb = Variable(head)

                body_cfg = []
                for letter in body:
                    if letter.isupper():
                        variable = Variable(letter)
                        variables.add(variable)
                        body_cfg.append(variable)
                    else:
                        terminal = Terminal(letter)
                        terminals.add(terminal)
                        body_cfg.append(terminal)

                productions.add(Production(Variable(head), body_cfg))

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg
    def read_grammar_with_regex(cls, name):
        id = 0

        terminals, variables, productions = set(), set(), set()
        start_symb = None

        with open(name, 'r') as file:
            productions_txt = file.readlines()

            for production_txt in productions_txt:
                line = production_txt.strip().split()
                head, body = line[0], ' '.join(line[1:])
                head = Variable(head)

                if start_symb is None:
                    start_symb = head

                new_productions, new_variables, new_terminals, id = CFGrammar.read_production_regex(
                    head, Regex(body), id)

                productions |= new_productions
                variables |= new_variables
                terminals |= new_terminals

        cfg = CFG(variables, terminals, start_symb, productions)

        return cfg