def cfpq_matrix_product(graph: Graph, grammar: CFG): if graph.vertices_count == 0: return False result = dict() terminal_productions = set() non_terminal_productions = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count) matrix += Matrix.identity(BOOL, graph.vertices_count) result[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for production in cfg.productions: if len(production.body) == 1: terminal_productions.add(production) else: non_terminal_productions.add(production) for t, matrix in graph.label_matrices.items(): for production in terminal_productions: if production.body == [Terminal(t)]: if production.head not in result: result[production.head] = matrix.dup() else: result[production.head] += matrix.dup() old_changed = set() new_changed = cfg.variables while len(new_changed) > 0: old_changed = new_changed new_changed = set() for production in non_terminal_productions: if production.body[0] not in result or production.body[ 1] not in result: continue if (production.body[0] in old_changed or production.body[1] in old_changed): matrix = result.get( production.head, Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count)) old_nvals = matrix.nvals result[production.head] = matrix + (result[ production.body[0]] @ result[production.body[1]]) if result[production.head].nvals != old_nvals: new_changed.add(production.head) return result.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.vertices_count, graph.vertices_count))
def cfpq_matrix_multiplication(grammar: CFG, graph: BMGraph): res = dict() terminal_prods = set() nonterminal_prods = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True res[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for prod in cfg.productions: if len(prod.body) == 1: terminal_prods.add(prod) else: nonterminal_prods.add(prod) with semiring.LOR_LAND_BOOL: for t, matrix in graph.matrices.items(): for prod in terminal_prods: if prod.body == [Terminal(t)]: if prod.head not in res: res[prod.head] = matrix.dup() else: res[prod.head] += matrix.dup() with semiring.LOR_LAND_BOOL: old_changed = set() new_changed = cfg.variables while len(new_changed) > 0: old_changed = new_changed new_changed = set() for prod in nonterminal_prods: if prod.body[0] not in res or prod.body[1] not in res: continue if (prod.body[0] in old_changed or prod.body[1] in old_changed): matrix = res.get( prod.head, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) old_nvals = matrix.nvals res[prod.head] = matrix + \ (res[prod.body[0]] @ res[prod.body[1]]) if (res[prod.head].nvals != old_nvals): new_changed.add(prod.head) return res.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def cfpq_matrix(graph: Graph, grammar: CFG): size = graph.size if size == 0: return Matrix.sparse(BOOL, size, size) result = Graph() start_symbol = grammar.start_symbol result.size = size for variable in grammar.variables: result.label_dictionary[variable] = Matrix.sparse(BOOL, size, size) for label in graph.label_dictionary: terminal = Terminal(label) result.label_dictionary[terminal] = graph.label_dictionary[ label].dup() for from_, to in graph.get_edges(label): for production in grammar.productions: if len(production.body ) == 1 and production.body[0] == terminal: head = production.head result.label_dictionary[head][from_, to] = 1 if grammar.generate_epsilon(): for vertex in graph.vertices: result.label_dictionary[start_symbol][vertex, vertex] = 1 terminal_productions = set() nonterminal_productions = set() for production in grammar.productions: if len(production.body) == 1: terminal_productions.add(production) elif len(production.body) >= 2: nonterminal_productions.add(production) matrix_changing = True with semiring.LOR_LAND_BOOL: while matrix_changing: matrix_changing = False for production in nonterminal_productions: head = production.head body = production.body if len(body) == 2: previous = result.label_dictionary[head].nvals current = result.label_dictionary[ body[0]] @ result.label_dictionary[body[1]] result.label_dictionary[ head] = result.label_dictionary[head] + current if previous != result.label_dictionary[head].nvals: matrix_changing = True return result.label_dictionary[start_symbol]
def __init__(self, start_symbol=None, productions=None): cfg = CFG(start_symbol=start_symbol, productions=productions) cnf = cfg.to_normal_form() # needed for language preservation if cfg.generate_epsilon(): cnf._productions.add(Production(cnf._start_symbol, [])) self.pair_productions = set() for p in cnf._productions: if len(p.body) == 2: self.pair_productions.add(p) super(GrammarCNF, self).__init__(start_symbol=cnf._start_symbol, productions=cnf._productions)
def __init__(self, cfg: CFG, is_reduced: bool = False): if not is_reduced: if any(p.body.__contains__(cfg.start_symbol) for p in cfg.productions): new_start_variable_name = 'S\'' name_is_used = cfg.variables.__contains__(new_start_variable_name) while name_is_used: new_start_variable_name += '\'' name_is_used = cfg.variables.__contains__(new_start_variable_name) new_start_variable = Variable(new_start_variable_name) cfg._productions.add(Production(new_start_variable, [cfg._start_symbol])) cfg._variables.add(new_start_variable) cfg._start_symbol = new_start_variable generate_epsilon = cfg.generate_epsilon() cfg = cfg.to_normal_form() if generate_epsilon: cfg._productions.add(Production(cfg.start_symbol, [])) super().__init__(cfg.variables, cfg.terminals, cfg.start_symbol, cfg.productions)
def CYK(grammar: CFG, word): size = len(word) if size == 0: return grammar.generate_epsilon() cfg = grammar.to_normal_form() m = [[set() for _ in range(size)] for _ in range(size)] for i in range(size): for prod in cfg.productions: if prod.body == [Terminal(word[i])]: m[i][i].add(prod.head) for i in range(size): for j in range(size - i): for k in range(i): first, second = m[j][j + k], m[j + k + 1][j + i] for prod in cfg.productions: if (len(prod.body) == 2 and prod.body[0] in first and prod.body[1] in second): m[j][j + i].add(prod.head) return cfg.start_symbol in m[0][size - 1]
def cnf_from_cfg(cfg: CFG) -> CFG: """Create a context-free grammar in Chomsky normal form [1]_ from given context-free grammar [2]_. Parameters ---------- cfg : CFG Context-free grammar. Examples -------- >>> import cfpq_data >>> cfg = cfpq_data.cfg_from_text("S -> a S b S | epsilon") >>> cnf = cfpq_data.cnf_from_cfg(cfg) >>> [cnf.contains(word) for word in ["", "ab", "aabb"]] [True, True, True] Returns ------- cnf : CFG Context-free grammar in Chomsky normal form. References ---------- .. [1] https://en.wikipedia.org/wiki/Chomsky_normal_form .. [2] https://en.wikipedia.org/wiki/Context-free_grammar#Formal_definitions """ generate_epsilon = cfg.generate_epsilon() cnf = cfg.to_normal_form() if generate_epsilon is True: cnf._productions.add(Production(cnf.start_symbol, [])) return cnf
def Hellings(grammar: CFG, graph: BMGraph): res = dict() m = deque() terminal_prods = set() nonterminal_prods = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True m.append((grammar.start_symbol, i, i)) res[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for prod in cfg.productions: if len(prod.body) == 1: terminal_prods.add(prod) else: nonterminal_prods.add(prod) with semiring.LOR_LAND_BOOL: for t, matrix in graph.matrices.items(): for prod in terminal_prods: if prod.body == [Terminal(t)]: if prod.head not in res: res[prod.head] = matrix.dup() else: res[prod.head] += matrix.dup() for var, matrix in res.items(): for i, j, _ in zip(*matrix.to_lists()): m.append((var, i, j)) while m: add_to_res = list() var, v_from, v_to = m.popleft() for new_var, matrix in res.items(): for new_from, _ in matrix[:, v_from]: for prod in nonterminal_prods: if (len(prod.body) == 2 and prod.body[0] == new_var and prod.body[1] == var and (prod.head not in res or res[prod.head].get(new_from, v_to) is None)): m.append((prod.head, new_from, v_to)) add_to_res.append((prod.head, new_from, v_to)) for new_var, matrix in res.items(): for new_to, _ in matrix[v_to, :]: for prod in nonterminal_prods: if (len(prod.body) == 2 and prod.body[0] == var and prod.body[1] == new_var and (prod.head not in res or res[prod.head].get(v_from, new_to) is None)): m.append((prod.head, v_from, new_to)) add_to_res.append((prod.head, v_from, new_to)) for var, v_from, v_to in add_to_res: matrix = res.get( var, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) matrix[v_from, v_to] = True res[var] = matrix return res.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def __init__(self, cfg: CFG): self.cfg = cfg self.generate_epsilon = cfg.generate_epsilon() self.cnf = cfg.to_normal_form() self.wcnf = self.get_weak_cnf()
def test_generate_epsilon(self): var_s = Variable("S") ter_a = Terminal("a") productions = [Production(var_s, [ter_a])] cfg = CFG(productions=productions, start_symbol=var_s) self.assertFalse(cfg.generate_epsilon())