def cfpq_matrix_multiplication(grammar: CFG, graph: BMGraph): res = dict() terminal_prods = set() nonterminal_prods = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True res[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for prod in cfg.productions: if len(prod.body) == 1: terminal_prods.add(prod) else: nonterminal_prods.add(prod) with semiring.LOR_LAND_BOOL: for t, matrix in graph.matrices.items(): for prod in terminal_prods: if prod.body == [Terminal(t)]: if prod.head not in res: res[prod.head] = matrix.dup() else: res[prod.head] += matrix.dup() with semiring.LOR_LAND_BOOL: old_changed = set() new_changed = cfg.variables while len(new_changed) > 0: old_changed = new_changed new_changed = set() for prod in nonterminal_prods: if prod.body[0] not in res or prod.body[1] not in res: continue if (prod.body[0] in old_changed or prod.body[1] in old_changed): matrix = res.get( prod.head, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) old_nvals = matrix.nvals res[prod.head] = matrix + \ (res[prod.body[0]] @ res[prod.body[1]]) if (res[prod.head].nvals != old_nvals): new_changed.add(prod.head) return res.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def transitive_closure_2(self): adj_matrix = Matrix.sparse(BOOL, self.n_vertices, self.n_vertices) result = Matrix.sparse(BOOL, self.n_vertices, self.n_vertices) for label_matrix in self.label_matrices.values(): adj_matrix += label_matrix if adj_matrix.nvals != 0: while True: old = result.nvals result += adj_matrix if old == result.nvals: break return result
def read_graph_from_file(self, file_path): self.__init__() # read graph from file graph_file = open(file_path, 'r') edges = graph_file.read().rstrip().split('\n') graph_file.close() # get vertices count max_vertex = 0 for edge in edges: if edge == '': return start, label, end = edge.split(' ') max_vertex = max([max_vertex, int(start), int(end)]) self.vertices_count = max_vertex + 1 # init label_matrices for edge in edges: i, label, j = edge.split(" ") if label in self.label_matrices: self.label_matrices[label][int(i), int(j)] = True else: bool_matrix = Matrix.sparse(BOOL, self.vertices_count, self.vertices_count) bool_matrix[int(i), int(j)] = True self.label_matrices[label] = bool_matrix # init start and terminal vertices for i in range(self.vertices_count): self.start_vertices.add(i) self.terminal_vertices.add(i)
def from_rsm(cls, rsm: RSM): """ Build RSA from a given cfpq_data Recursive State Machine @param rsm: RSM on which RSA is built @return: initialized class """ rsa = RecursiveAutomaton() rsa.start_nonterm = rsm.start_symbol.to_text() current_state = 0 transtion_by_label = dict() for nonterm, dfa in rsm.boxes: mapping_state = dict() rsa.nonterminals.add(nonterm.to_text()) rsa.labels = rsa.labels.union(dfa.symbols) rsa.boxes[nonterm.to_text()] = [] for label in dfa.symbols: if label not in transtion_by_label: transtion_by_label.update({label: []}) dfa_dict = dfa.to_dict() for state in dfa_dict: if state not in mapping_state: mapping_state[state] = current_state rsa.boxes[nonterm.to_text()].append(current_state) current_state += 1 for trans in dfa_dict[state]: if dfa_dict[state][trans] not in mapping_state: mapping_state[dfa_dict[state][trans]] = current_state rsa.boxes[nonterm.to_text()].append(current_state) current_state += 1 transtion_by_label[trans].append((mapping_state[state], mapping_state[dfa_dict[state][trans]])) rsa.states[nonterm.to_text()] = [] rsa.start_state[nonterm.to_text()] = mapping_state[dfa.start_state] rsa.finish_states[nonterm.to_text()] = [] for final_state in dfa.final_states: rsa.states[nonterm.to_text()].append((mapping_state[dfa.start_state], mapping_state[final_state])) rsa.finish_states[nonterm.to_text()].append(mapping_state[final_state]) if mapping_state[dfa.start_state] == mapping_state[final_state]: rsa.start_and_finish.add(nonterm.to_text()) rsa.matrices_size = current_state for label in transtion_by_label: rsa.matrices[label] = Matrix.sparse(BOOL, rsa.matrices_size, rsa.matrices_size) for trans in transtion_by_label[label]: rsa.matrices[label][trans[0], trans[1]] = True if trans[0] in rsa.out_states: rsa.out_states[trans[0]].append((trans[1], label)) else: rsa.out_states[trans[0]] = [(trans[1], label)] rsa.terminals = rsa.labels.difference(rsa.nonterminals) return rsa
def transitive_closure(self): adj_matrix = Matrix.sparse(BOOL, self.n_vertices, self.n_vertices) for label_matrix in self.label_matrices.values(): if label_matrix.nvals != 0: adj_matrix = adj_matrix | label_matrix for k in range(self.n_vertices): old_nvals = adj_matrix.nvals adj_matrix = adj_matrix | (adj_matrix @ adj_matrix) if adj_matrix.nvals == old_nvals: break return adj_matrix
def transitive_closure_1(self): adj_matrix = Matrix.sparse(BOOL, self.n_vertices, self.n_vertices) for label_matrix in self.label_matrices.values(): adj_matrix += label_matrix if adj_matrix.nvals != 0: while True: old = adj_matrix.nvals adj_matrix += adj_matrix @ adj_matrix if old == adj_matrix: break return adj_matrix
def transitive_closure_mp(graph): closure = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) with semiring.LOR_LAND_BOOL: for matrix in graph.matrices.values(): closure += matrix temp = closure.dup() old_nvals = -1 new_nvals = closure.nvals while old_nvals != new_nvals: with semiring.LOR_LAND_BOOL: closure += temp @ closure old_nvals = new_nvals new_nvals = closure.nvals return closure
def from_file(cls, path: Path): """ Load RSA from file @param path: path to file with RSA @return: initialized class """ rsa = RecursiveAutomaton() with open(path, "r") as file: count_matrix = int(file.readline()) count_nonterminals = int(file.readline()) matrices_size = int(file.readline()) rsa.matrices_size = matrices_size for i in range(count_matrix): label = file.readline().replace("\n", "") rsa.labels.add(label) count_edge = int(file.readline()) for j in range(count_edge): first, second = file.readline().split() rsa[label][int(first), int(second)] = True if int(first) in rsa.out_states: rsa.out_states[int(first)].append((int(second), label)) else: rsa.out_states[int(first)] = [(int(second), label)] for i in range(count_nonterminals): label = file.readline().replace("\n", "") rsa.nonterminals.add(label) rsa.states.update({label: Matrix.sparse(BOOL, rsa.matrices_size, rsa.matrices_size)}) count_edge = int(file.readline()) for j in range(count_edge): first, second = file.readline().split() rsa.states[label][int(first), int(second)] = True rsa.start_state.update({label: int(first)}) if label in rsa.finish_states: rsa.finish_states[label].append(int(second)) else: rsa.finish_states.update({label: [int(second)]}) if first == second: rsa.start_and_finish.add(label) rsa.terminals = rsa.labels.difference(rsa.nonterminals) return rsa
def parse_regex(self, file_path): self.__init__() # read regex from file regex_file = open(file_path, 'r') regex = Regex(regex_file.read().rstrip()) regex_file.close() # regex to dfa conversion and vertices count init dfa = regex.to_epsilon_nfa().to_deterministic().minimize() self.vertices_count = len(dfa.states) # states enumeration states = {} start = 0 for state in dfa._states: if state not in states: states[state] = start start = start + 1 # init label_matrices for start in dfa._states: for label in dfa._input_symbols: in_states = dfa._transition_function(start, label) for end in in_states: if label in self.label_matrices: self.label_matrices[label][states[start], states[end]] = True else: bool_matrix = Matrix.sparse(BOOL, self.vertices_count, self.vertices_count) bool_matrix[states[start], states[end]] = True self.label_matrices[label] = bool_matrix # init start and terminal states self.start_vertices.add(states[dfa.start_state]) for state in dfa._final_states: self.terminal_vertices.add(states[state]) return self
def cfpq_tensor_product(grammar: CFG, graph: BMGraph): res = graph.dup() rfa = BMGraph() rfa_heads = dict() rfa.states_amount = sum( [len(prod.body) + 1 for prod in grammar.productions]) rfa.states = set(range(rfa.states_amount)) index = 0 for prod in grammar.productions: start_state = index final_state = index + len(prod.body) rfa.start_states.add(start_state) rfa.final_states.add(final_state) rfa_heads[(start_state, final_state)] = prod.head.value for var in prod.body: matrix = rfa.matrices.get( var.value, Matrix.sparse(BOOL, rfa.states_amount, rfa.states_amount)) matrix[index, index + 1] = True rfa.matrices[var.value] = matrix index += 1 index += 1 for prod in grammar.productions: if len(prod.body) == 0: matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True res.matrices[prod.head] = matrix is_changing = True while is_changing: is_changing = False intersection = rfa.intersect(res) closure = intersection.transitive_closure() for i, j, _ in zip(*closure.to_lists()): rfa_from, rfa_to = i // res.states_amount, j // res.states_amount graph_from, graph_to = i % res.states_amount, j % res.states_amount if (rfa_from, rfa_to) not in rfa_heads: continue var = rfa_heads[(rfa_from, rfa_to)] matrix = res.matrices.get( var, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) if matrix.get(graph_from, graph_to) is None: is_changing = True matrix[graph_from, graph_to] = True res.matrices[var] = matrix return res.matrices.get( grammar.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def Hellings(grammar: CFG, graph: BMGraph): res = dict() m = deque() terminal_prods = set() nonterminal_prods = set() if grammar.generate_epsilon(): matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True m.append((grammar.start_symbol, i, i)) res[grammar.start_symbol] = matrix cfg = grammar.to_normal_form() for prod in cfg.productions: if len(prod.body) == 1: terminal_prods.add(prod) else: nonterminal_prods.add(prod) with semiring.LOR_LAND_BOOL: for t, matrix in graph.matrices.items(): for prod in terminal_prods: if prod.body == [Terminal(t)]: if prod.head not in res: res[prod.head] = matrix.dup() else: res[prod.head] += matrix.dup() for var, matrix in res.items(): for i, j, _ in zip(*matrix.to_lists()): m.append((var, i, j)) while m: add_to_res = list() var, v_from, v_to = m.popleft() for new_var, matrix in res.items(): for new_from, _ in matrix[:, v_from]: for prod in nonterminal_prods: if (len(prod.body) == 2 and prod.body[0] == new_var and prod.body[1] == var and (prod.head not in res or res[prod.head].get(new_from, v_to) is None)): m.append((prod.head, new_from, v_to)) add_to_res.append((prod.head, new_from, v_to)) for new_var, matrix in res.items(): for new_to, _ in matrix[v_to, :]: for prod in nonterminal_prods: if (len(prod.body) == 2 and prod.body[0] == var and prod.body[1] == new_var and (prod.head not in res or res[prod.head].get(v_from, new_to) is None)): m.append((prod.head, v_from, new_to)) add_to_res.append((prod.head, v_from, new_to)) for var, v_from, v_to in add_to_res: matrix = res.get( var, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) matrix[v_from, v_to] = True res[var] = matrix return res.get( cfg.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def __getitem__(self, item: str) -> Matrix: if item not in self.matrices: self.matrices[item] = Matrix.sparse(BOOL, self.matrices_size, self.matrices_size) return self.matrices[item]
def add_states(self, label: str): self._S.add(label) self._states.update({ label: Matrix.sparse(BOOL, self._matrices_size, self._matrices_size) })
def add_automaton(self, label: str): self._labels.add(label) self._automaton.update({ label: Matrix.sparse(BOOL, self._matrices_size, self._matrices_size) })
def get_by_label(self, label): if label not in self.label_matrices.keys(): self.label_matrices[label] = Matrix.sparse(BOOL, self.n_vertices, self.n_vertices) return self.label_matrices[label]