Ejemplo n.º 1
0
    def cfpq_matrix_product(graph: Graph, grammar: CFG):
        if graph.vertices_count == 0:
            return False

        result = dict()
        terminal_productions = set()
        non_terminal_productions = set()

        if grammar.generate_epsilon():
            matrix = Matrix.sparse(BOOL, graph.vertices_count,
                                   graph.vertices_count)
            matrix += Matrix.identity(BOOL, graph.vertices_count)
            result[grammar.start_symbol] = matrix

        cfg = grammar.to_normal_form()

        for production in cfg.productions:
            if len(production.body) == 1:
                terminal_productions.add(production)
            else:
                non_terminal_productions.add(production)

        for t, matrix in graph.label_matrices.items():
            for production in terminal_productions:
                if production.body == [Terminal(t)]:
                    if production.head not in result:
                        result[production.head] = matrix.dup()
                    else:
                        result[production.head] += matrix.dup()

        old_changed = set()
        new_changed = cfg.variables

        while len(new_changed) > 0:
            old_changed = new_changed
            new_changed = set()

            for production in non_terminal_productions:
                if production.body[0] not in result or production.body[
                        1] not in result:
                    continue

                if (production.body[0] in old_changed
                        or production.body[1] in old_changed):
                    matrix = result.get(
                        production.head,
                        Matrix.sparse(BOOL, graph.vertices_count,
                                      graph.vertices_count))
                    old_nvals = matrix.nvals
                    result[production.head] = matrix + (result[
                        production.body[0]] @ result[production.body[1]])

                    if result[production.head].nvals != old_nvals:
                        new_changed.add(production.head)

        return result.get(
            cfg.start_symbol,
            Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count))
Ejemplo n.º 2
0
    def cfpq_matrix_multiplication(grammar: CFG, graph: BMGraph):
        res = dict()
        terminal_prods = set()
        nonterminal_prods = set()

        if grammar.generate_epsilon():
            matrix = Matrix.sparse(BOOL, graph.states_amount,
                                   graph.states_amount)
            for i in range(graph.states_amount):
                matrix[i, i] = True
            res[grammar.start_symbol] = matrix

        cfg = grammar.to_normal_form()

        for prod in cfg.productions:
            if len(prod.body) == 1:
                terminal_prods.add(prod)
            else:
                nonterminal_prods.add(prod)

        with semiring.LOR_LAND_BOOL:
            for t, matrix in graph.matrices.items():
                for prod in terminal_prods:
                    if prod.body == [Terminal(t)]:
                        if prod.head not in res:
                            res[prod.head] = matrix.dup()
                        else:
                            res[prod.head] += matrix.dup()

        with semiring.LOR_LAND_BOOL:
            old_changed = set()
            new_changed = cfg.variables

            while len(new_changed) > 0:
                old_changed = new_changed
                new_changed = set()

                for prod in nonterminal_prods:
                    if prod.body[0] not in res or prod.body[1] not in res:
                        continue

                    if (prod.body[0] in old_changed
                            or prod.body[1] in old_changed):
                        matrix = res.get(
                            prod.head,
                            Matrix.sparse(BOOL, graph.states_amount,
                                          graph.states_amount))
                        old_nvals = matrix.nvals
                        res[prod.head] = matrix + \
                            (res[prod.body[0]] @ res[prod.body[1]])

                        if (res[prod.head].nvals != old_nvals):
                            new_changed.add(prod.head)

        return res.get(
            cfg.start_symbol,
            Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
Ejemplo n.º 3
0
    def cfpq_matrix(graph: Graph, grammar: CFG):
        size = graph.size
        if size == 0:
            return Matrix.sparse(BOOL, size, size)
        result = Graph()
        start_symbol = grammar.start_symbol
        result.size = size
        for variable in grammar.variables:
            result.label_dictionary[variable] = Matrix.sparse(BOOL, size, size)

        for label in graph.label_dictionary:
            terminal = Terminal(label)
            result.label_dictionary[terminal] = graph.label_dictionary[
                label].dup()

            for from_, to in graph.get_edges(label):
                for production in grammar.productions:
                    if len(production.body
                           ) == 1 and production.body[0] == terminal:
                        head = production.head
                        result.label_dictionary[head][from_, to] = 1

        if grammar.generate_epsilon():
            for vertex in graph.vertices:
                result.label_dictionary[start_symbol][vertex, vertex] = 1

        terminal_productions = set()
        nonterminal_productions = set()
        for production in grammar.productions:
            if len(production.body) == 1:
                terminal_productions.add(production)
            elif len(production.body) >= 2:
                nonterminal_productions.add(production)

        matrix_changing = True
        with semiring.LOR_LAND_BOOL:
            while matrix_changing:
                matrix_changing = False
                for production in nonterminal_productions:
                    head = production.head
                    body = production.body
                    if len(body) == 2:
                        previous = result.label_dictionary[head].nvals
                        current = result.label_dictionary[
                            body[0]] @ result.label_dictionary[body[1]]
                        result.label_dictionary[
                            head] = result.label_dictionary[head] + current
                        if previous != result.label_dictionary[head].nvals:
                            matrix_changing = True

        return result.label_dictionary[start_symbol]
Ejemplo n.º 4
0
    def __init__(self, start_symbol=None, productions=None):
        cfg = CFG(start_symbol=start_symbol, productions=productions)

        cnf = cfg.to_normal_form()
        # needed for language preservation
        if cfg.generate_epsilon():
            cnf._productions.add(Production(cnf._start_symbol, []))

        self.pair_productions = set()
        for p in cnf._productions:
            if len(p.body) == 2:
                self.pair_productions.add(p)

        super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol,
                                         productions=cnf._productions)
Ejemplo n.º 5
0
 def __init__(self,
              cfg: CFG,
              is_reduced: bool = False):
     if not is_reduced:
         if any(p.body.__contains__(cfg.start_symbol) for p in cfg.productions):
             new_start_variable_name = 'S\''
             name_is_used = cfg.variables.__contains__(new_start_variable_name)
             while name_is_used:
                 new_start_variable_name += '\''
                 name_is_used = cfg.variables.__contains__(new_start_variable_name)
             new_start_variable = Variable(new_start_variable_name)
             cfg._productions.add(Production(new_start_variable, [cfg._start_symbol]))
             cfg._variables.add(new_start_variable)
             cfg._start_symbol = new_start_variable
     generate_epsilon = cfg.generate_epsilon()
     cfg = cfg.to_normal_form()
     if generate_epsilon:
         cfg._productions.add(Production(cfg.start_symbol, []))
     super().__init__(cfg.variables, cfg.terminals, cfg.start_symbol, cfg.productions)
Ejemplo n.º 6
0
    def CYK(grammar: CFG, word):
        size = len(word)
        if size == 0:
            return grammar.generate_epsilon()

        cfg = grammar.to_normal_form()
        m = [[set() for _ in range(size)] for _ in range(size)]

        for i in range(size):
            for prod in cfg.productions:
                if prod.body == [Terminal(word[i])]:
                    m[i][i].add(prod.head)

        for i in range(size):
            for j in range(size - i):
                for k in range(i):
                    first, second = m[j][j + k], m[j + k + 1][j + i]
                    for prod in cfg.productions:
                        if (len(prod.body) == 2 and prod.body[0] in first
                                and prod.body[1] in second):
                            m[j][j + i].add(prod.head)

        return cfg.start_symbol in m[0][size - 1]
Ejemplo n.º 7
0
def cnf_from_cfg(cfg: CFG) -> CFG:
    """Create a context-free grammar
    in Chomsky normal form [1]_
    from given context-free grammar [2]_.

    Parameters
    ----------
    cfg : CFG
        Context-free grammar.

    Examples
    --------
    >>> import cfpq_data
    >>> cfg = cfpq_data.cfg_from_text("S -> a S b S | epsilon")
    >>> cnf = cfpq_data.cnf_from_cfg(cfg)
    >>> [cnf.contains(word) for word in ["", "ab", "aabb"]]
    [True, True, True]

    Returns
    -------
    cnf : CFG
        Context-free grammar
        in Chomsky normal form.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form
    .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions
    """
    generate_epsilon = cfg.generate_epsilon()

    cnf = cfg.to_normal_form()

    if generate_epsilon is True:
        cnf._productions.add(Production(cnf.start_symbol, []))

    return cnf
Ejemplo n.º 8
0
    def Hellings(grammar: CFG, graph: BMGraph):
        res = dict()
        m = deque()
        terminal_prods = set()
        nonterminal_prods = set()

        if grammar.generate_epsilon():
            matrix = Matrix.sparse(BOOL, graph.states_amount,
                                   graph.states_amount)
            for i in range(graph.states_amount):
                matrix[i, i] = True
                m.append((grammar.start_symbol, i, i))
            res[grammar.start_symbol] = matrix

        cfg = grammar.to_normal_form()

        for prod in cfg.productions:
            if len(prod.body) == 1:
                terminal_prods.add(prod)
            else:
                nonterminal_prods.add(prod)

        with semiring.LOR_LAND_BOOL:
            for t, matrix in graph.matrices.items():
                for prod in terminal_prods:
                    if prod.body == [Terminal(t)]:
                        if prod.head not in res:
                            res[prod.head] = matrix.dup()
                        else:
                            res[prod.head] += matrix.dup()

        for var, matrix in res.items():
            for i, j, _ in zip(*matrix.to_lists()):
                m.append((var, i, j))

        while m:
            add_to_res = list()
            var, v_from, v_to = m.popleft()

            for new_var, matrix in res.items():
                for new_from, _ in matrix[:, v_from]:
                    for prod in nonterminal_prods:
                        if (len(prod.body) == 2 and prod.body[0] == new_var
                                and prod.body[1] == var and
                            (prod.head not in res
                             or res[prod.head].get(new_from, v_to) is None)):
                            m.append((prod.head, new_from, v_to))
                            add_to_res.append((prod.head, new_from, v_to))

            for new_var, matrix in res.items():
                for new_to, _ in matrix[v_to, :]:
                    for prod in nonterminal_prods:
                        if (len(prod.body) == 2 and prod.body[0] == var
                                and prod.body[1] == new_var and
                            (prod.head not in res
                             or res[prod.head].get(v_from, new_to) is None)):
                            m.append((prod.head, v_from, new_to))
                            add_to_res.append((prod.head, v_from, new_to))

            for var, v_from, v_to in add_to_res:
                matrix = res.get(
                    var,
                    Matrix.sparse(BOOL, graph.states_amount,
                                  graph.states_amount))
                matrix[v_from, v_to] = True
                res[var] = matrix

        return res.get(
            cfg.start_symbol,
            Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
Ejemplo n.º 9
0
 def __init__(self, cfg: CFG):
     self.cfg = cfg
     self.generate_epsilon = cfg.generate_epsilon()
     self.cnf = cfg.to_normal_form()
     self.wcnf = self.get_weak_cnf()
Ejemplo n.º 10
0
 def test_generate_epsilon(self):
     var_s = Variable("S")
     ter_a = Terminal("a")
     productions = [Production(var_s, [ter_a])]
     cfg = CFG(productions=productions, start_symbol=var_s)
     self.assertFalse(cfg.generate_epsilon())