def prepare(self, graph: Graph, grammar: CFG): self.graph = graph self.graph.load_bool_graph() self.grammar = CnfGrammar.from_cfg(grammar) self.sources = LabelGraph(self.graph.matrices_size) self.nonterminals = init_simple_rules(self.grammar.simple_rules, self.graph)
def bfs(self, i, j): if (i, j) in self.negative_visited_in_kron: return LabelGraph(self.graph_size) if (i, j) in self.visited_pairs_in_kron: bogus_result = LabelGraph(self.graph_size) bogus_result.is_empty = False return bogus_result self.visited_pairs_in_kron.add((i, j)) result_graph = LabelGraph(self.graph_size) for k in self.kron[i]: # k = (vertex, True) graph_i = i % self.graph_size graph_k = k[0] % self.graph_size rsa_i = i // self.graph_size rsa_k = k[0] // self.graph_size left = LabelGraph(self.graph_size) for label in self.rsa.labels: if self.rsa[label].get(rsa_i, rsa_k, default=False): if label in self.rsa.nonterminals: left += self.get_paths(graph_i, graph_k, label) else: if label not in left.matrices: left[label] = Matrix.sparse( BOOL, self.graph_size, self.graph_size) left[label][graph_i, graph_k] = True if left.is_empty: continue right = LabelGraph(self.graph_size) if k[0] != j: right = self.bfs(k[0], j) else: right.is_empty = False if right.is_empty: continue result_graph += left + right if result_graph.is_empty: self.negative_visited_in_kron.add((i, j)) self.visited_pairs_in_kron.remove((i, j)) return result_graph
def get_paths(self, start, finish, nonterm): if (start, finish, nonterm) in self.solved_triplets: bogus_result = LabelGraph(self.graph_size) bogus_result.is_empty = False return bogus_result self.solved_triplets.add((start, finish, nonterm)) result = LabelGraph(self.graph_size) start_state = self.rsa.start_state[nonterm] for finish_state in self.rsa.finish_states[nonterm]: result += self.bfs(start_state * self.graph_size + start, finish_state * self.graph_size + finish) self.solved_triplets.remove((start, finish, nonterm)) return result
def solve(self, sources_vertices: Iterable) -> Tuple[Matrix, StatResponse]: new_sources = LabelGraph(self.graph.matrices_size) # Initialize sources and nonterms nnz # nnz: (l, r1, r2) in complex rules -> (nnz(new[l]), nnz(index[r1]), nnz(index[r2])) nnz = {} for l, r1, r2 in self.grammar.complex_rules: nnz[(l, r1, r2)] = (0, 0, 0) # Initialize source matrices masks for i in sources_vertices: if (i, i) not in self.index.sources[self.grammar.start_nonterm]: new_sources[self.grammar.start_nonterm][i, i] = True # Create temporary matrix tmp = Matrix.sparse(BOOL, self.graph.matrices_size, self.graph.matrices_size) # Algo's body changed = True iter_count = 0 while changed: iter_count += 1 changed = False # Iterate through all complex rules for l, r1, r2 in self.index.grammar.complex_rules: # l -> r1 r2 ==> index[l] += (new[l_src] * index[r1]) * index[r2] new_nnz = new_sources[l].nvals, self.index.nonterms[ r1].nvals, self.index.nonterms[r2].nvals if nnz[(l, r1, r2)] != new_nnz: # 1) new[r1_src] += {(j, j) : (j, j) in new[l_src] and not in index[r1_src]} for i, _, _ in new_sources[l]: if (i, i) not in self.index.sources[r1]: new_sources[r1][i, i] = True # 2) tmp = new[l_src] * index[r1] tmp = new_sources[l] @ self.index.nonterms[r1] # 3) new[r2_src] += {(j, j) : (i, j) in tmp and not in index[r2_src]} update_sources_opt(tmp, self.index.sources[r2], new_sources[r2]) # 4) index[l] += tmp * index[r2] self.index.nonterms[l] += tmp @ self.index.nonterms[r2] # update nnz nnz[(l, r1, r2)] = new_sources[l].nvals, self.index.nonterms[ r1].nvals, self.index.nonterms[r2].nvals changed = True return self.index.nonterms[ self.index.grammar.start_nonterm], StatResponse(iter_count)
def matrix_base_algo(g: LabelGraph, grammar: CnfGrammar): m = LabelGraph() for l, r in grammar.simple_rules: m[l] += g[r] changed = True while changed: for l, r1, r2 in grammar.complex_rules: old_nnz = m[l].nvals m[l] += m[r1] @ m[r2] new_nnz = m[l].nvals changed = not old_nnz == new_nnz return m
def benchmark_ms(algo_name, data, result_dir): """ Measurement function for finding paths from set of vertices @param algo_name: concrete implementation of the algorithm @param data: dictionary in format {path to graph: list of paths to grammars} @param result_dir: directory for uploading results of measurement """ header_index = ['graph', 'grammar', 'size_chunk', 'time', 'count_S'] chunk_sizes = [1, 2, 4, 8, 16, 32, 50, 100, 500, 1000, 5000, 10000, None] for graph in data: result_index_file_path = result_dir.joinpath( f'{graph.stem}-{algo_name.__name__}-msindex') append_header = False if not exists(result_index_file_path): append_header = True result_csv = open(result_index_file_path, mode='a', newline='\n') csv_writer_index = csv.writer(result_csv, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, escapechar=' ') if append_header: csv_writer_index.writerow(header_index) if not exists(result_index_file_path): csv_writer_index.writerow(header_index) g = LabelGraph.from_txt(graph) for grammar in data[graph]: algo = algo_name() algo.prepare(Graph.from_txt(graph), cfg_from_txt(grammar)) for chunk_size in chunk_sizes: chunks = [] if chunk_size is None: chunks = g.chunkify(g.matrices_size) else: chunks = g.chunkify(chunk_size) for chunk in tqdm(chunks, desc=f'{graph.stem}-{grammar.stem}'): algo.clear_src( ) # Attention (TODO): remove this line if you want to cache the result ! start = time() res = algo.solve(chunk) finish = time() csv_writer_index.writerow([ graph.stem, grammar.stem, chunk_size, finish - start, res.matrix_S.nvals ])
def prepare(self, graph: Graph, grammar: Union[RSM, CFG, Path]): self.graph = graph self.graph.load_bool_graph() self.grammar = RecursiveAutomaton.from_grammar_or_path(grammar) self.part_graph = LabelGraph(self.graph.matrices_size) self.src_for_states = dict() for i in range(self.grammar.matrices_size): self.src_for_states.update({ i: Matrix.sparse(BOOL, self.graph.matrices_size, self.graph.matrices_size) })
def solve(self): m = LabelGraph(self.graph.matrices_size) for l, r in self.grammar.simple_rules: m[l] += self.graph[r] changed = True while changed: changed = False for l, r1, r2 in self.grammar.complex_rules: old_nnz = m[l].nvals m[l] += m[r1] @ m[r2] new_nnz = m[l].nvals changed |= not old_nnz == new_nnz return m[self.grammar.start_nonterm]
def test_case_1_graph_1(algo): # (0)-[a]->(1)-[a]->(2)-[b]->(3)-[b]->(4) # S -> a S b | a b test_case_1_path = LOCAL_CFPQ_DATA.joinpath('test_case_1') graph = LabelGraph.from_txt( test_case_1_path.joinpath('Matrices/graph_1.txt')) grammar = CnfGrammar.from_cnf( test_case_1_path.joinpath('Grammars/grammar.cnf')) single_source: SingleSourceSolver = algo(graph, grammar) m, _ = single_source.solve([1]) assert vecbool_to_list(m[1]) == [3] m, _ = single_source.solve([0]) assert vecbool_to_list(m[0]) == [4]
def test_single_source_benchmark_total(graph, grammar, algo, chunk_size, benchmark): g = LabelGraph.from_txt(graph) gr = CnfGrammar.from_cnf(grammar) chunks = g.chunkify(g.matrices_size if chunk_size is None else chunk_size) if chunk_size is not None and chunk_size > g.matrices_size: return def run_suite(): a = algo(g, gr) for chunk in tqdm( chunks, desc= f'{get_file_name(graph)}-{get_file_name(grammar)}-{algo.__name__}-{chunk_size}' ): a.solve(chunk) benchmark.pedantic(run_suite, rounds=5, iterations=1, warmup_rounds=0)
def check_single_source_per_chunk(graph, grammar, algo, chunk_count=None, verbose=True): g = LabelGraph.from_txt(graph) gr = CnfGrammar.from_cnf(grammar) a = algo(g, gr) base_algo = MatrixBaseAlgo(graph[:-4], grammar[:-4]) m = base_algo.solve() if chunk_count is None: chunk_count = g.matrices_size chunk_size = max(g.matrices_size // chunk_count, 1) chunks = g.chunkify(chunk_size) for chunk in tqdm(chunks, desc=get_file_name(graph)) if verbose else chunks: m1, _ = a.solve(chunk) assert m1.extract_matrix(chunk).iseq(m.extract_matrix(chunk))
def solve(self): m = LabelGraph(self.graph.matrices_size) for l in self.grammar.eps_rules: for i in range(m.matrices_size): m[l][i, i] = True for l, r in self.grammar.simple_rules: m[l] += self.graph[r] changed = True iter = 0 while changed: iter += 1 changed = False for l, r1, r2 in self.grammar.complex_rules: old_nnz = m[l].nvals m[l] += m[r1].mxm(m[r2], semiring=BOOL.ANY_PAIR) new_nnz = m[l].nvals changed |= not old_nnz == new_nnz return ResultAlgo(m[self.grammar.start_nonterm], iter)
def test_case_1_graph_2(algo): # (6) # / \ # (2) (5) # / \ / \ # (0) (1) (3) (4) # Upstream edges labeled by "a", downstream by "b" # S -> a S b | a b test_case_1_path = LOCAL_CFPQ_DATA.joinpath('test_case_1') graph = LabelGraph.from_txt( test_case_1_path.joinpath('Matrices/graph_2.txt')) grammar = CnfGrammar.from_cnf( test_case_1_path.joinpath('Grammars/grammar.cnf')) single_source: SingleSourceSolver = algo(graph, grammar) m, _ = single_source.solve([0]) assert vecbool_to_list(m[0]) == [0, 1, 3, 4] m, _ = single_source.solve([2]) assert vecbool_to_list(m[2]) == [2, 5]
def test_single_source_benchmark_granularity(graph, grammar, algo, chunk_size, result_folder): g = LabelGraph.from_txt(graph) gr = CnfGrammar.from_cnf(grammar) a = algo(g, gr) gr_name = get_file_name(grammar) a_name = type(a).__name__ if chunk_size is None: chunk_size = g.matrices_size chunks = g.chunkify(chunk_size) result_file = f'{get_file_name(graph)}.csv' result_file_path = os.path.join(result_folder, result_file) append_headers = False if not os.path.exists(result_file_path): append_headers = True with open(result_file_path, mode='a', newline='\n') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, escapechar=' ') headers = ['grammar', 'algo', 'chunk_size', 'times'] timer = SimpleTimer() times_of_chunks = [] for chunk in chunks: timer.tic() a.solve(chunk) chunk_time = timer.toc() times_of_chunks.append(chunk_time) if append_headers: csv_writer.writerow(headers) csv_writer.writerow([gr_name, a_name, chunk_size, times_of_chunks])
def solve(self, sources: Iterable): new_sources = LabelGraph(self.graph.matrices_size) # Initialize sources and nonterms nnz # nnz: (l, r1, r2) in complex rules -> (nnz(new[l]), nnz(index[r1]), nnz(index[r2])) nnz = {} for l, r1, r2 in self.grammar.complex_rules: nnz[(l, r1, r2)] = (0, self.nonterminals[r1].nvals, self.nonterminals[r2].nvals) # Initialize source matrices masks m_src = Matrix.sparse(BOOL, self.graph.matrices_size, self.graph.matrices_size) for i in sources: m_src[i, i] = True if (i, i) not in self.sources[self.grammar.start_nonterm]: new_sources[self.grammar.start_nonterm][i, i] = True # Create temporary matrix tmp = Matrix.sparse(BOOL, self.graph.matrices_size, self.graph.matrices_size) # Algo's body changed = True iter = 0 while changed: iter += 1 changed = False # Iterate through all complex rules for l, r1, r2 in self.grammar.complex_rules: # l -> r1 r2 ==> index[l] += (new[l_src] * index[r1]) * index[r2] new_nnz = new_sources[l].nvals, self.nonterminals[ r1].nvals, self.nonterminals[r2].nvals if nnz[(l, r1, r2)] != new_nnz: # 1) new[r1_src] += {(j, j) : (j, j) in new[l_src] and not in index[r1_src]} for i, _, _ in new_sources[l]: if (i, i) not in self.sources[r1]: new_sources[r1][i, i] = True # 2) tmp = new[l_src] * index[r1] new_sources[l].mxm(self.nonterminals[r1], out=tmp, semiring=BOOL.ANY_PAIR) # 3) new[r2_src] += {(j, j) : (i, j) in tmp and not in index[r2_src]} update_sources_opt(tmp, self.sources[r2], new_sources[r2]) # 4) index[l] += tmp * index[r2] self.nonterminals[l] += tmp.mxm(self.nonterminals[r2], semiring=BOOL.ANY_PAIR) # update nnz nnz[(l, r1, r2)] = new_sources[l].nvals, self.nonterminals[ r1].nvals, self.nonterminals[r2].nvals changed = True for n in self.grammar.nonterms: self.sources[n] += new_sources[n] return ResultAlgo(m_src.mxm(self.nonterminals[self.grammar.start_nonterm], semiring=BOOL.ANY_PAIR), iter), \ self.nonterminals[self.grammar.start_nonterm]
def init_simple_rules(rules, graph: Graph): nonterms = LabelGraph(graph.matrices_size) for l, r in rules: nonterms[l] += graph[r] return nonterms
from src.grammar.cnf_grammar import CnfGrammar from src.graph.label_graph import LabelGraph from src.utils.time_profiler import SimpleTimer from src.algo.matrix_base import matrix_base_algo g = LabelGraph.from_txt('deps/CFPQ_Data/data/WorstCase/Matrices/worstcase_128.txt') gr = CnfGrammar.from_cnf('deps/CFPQ_Data/data/WorstCase/Grammars/Brackets.cnf') with SimpleTimer(): m = matrix_base_algo(g, gr)
def __init__(self, graph: LabelGraph, grammar: CnfGrammar): self.graph = graph self.grammar = grammar self.sources = LabelGraph(graph.matrices_size) self.nonterms = LabelGraph(graph.matrices_size)
def solve(self): restore_eps_paths(self.grammar.start_and_finish, self.graph) sizeKron = self.graph.matrices_size * self.grammar.matrices_size prev_kron = Matrix.sparse(BOOL, sizeKron, sizeKron) iter = 0 block = LabelGraph(self.graph.matrices_size) changed = True first_iter = True while changed: changed = False iter += 1 kron = Matrix.sparse(BOOL, sizeKron, sizeKron) if first_iter: for label in self.grammar.labels: kron += self.grammar[label].kronecker(self.graph[label]) else: for nonterminal in block.matrices: kron += self.grammar[nonterminal].kronecker(block[nonterminal]) block[nonterminal] = Matrix.sparse(BOOL, self.graph.matrices_size, self.graph.matrices_size) transitive_closure(kron) if not first_iter: part = prev_kron.mxm(kron, semiring=BOOL.ANY_PAIR) with BOOL.ANY_PAIR: kron += prev_kron + part @ prev_kron + part + kron @ prev_kron prev_kron = kron for nonterminal in self.grammar.nonterminals: for element in self.grammar.states[nonterminal]: i = element[0] j = element[1] start_i = i * self.graph.matrices_size start_j = j * self.graph.matrices_size control_sum = self.graph[nonterminal].nvals if first_iter: block[nonterminal] += kron[start_i:start_i + self.graph.matrices_size - 1, start_j:start_j + self.graph.matrices_size - 1] else: new_edges = kron[start_i:start_i + self.graph.matrices_size - 1, start_j:start_j + self.graph.matrices_size - 1] part = new_edges - block[nonterminal] block[nonterminal] += part.select('==', True) self.graph[nonterminal] += block[nonterminal] new_control_sum = self.graph[nonterminal].nvals if new_control_sum != control_sum: changed = True first_iter = False if self.grammar.nonterminals.isdisjoint(self.grammar.labels): break return ResultAlgo(self.graph[self.grammar.start_nonterm], iter)
def __init__(self, path_to_graph: Path, path_to_grammar: Path): super().__init__(path_to_graph, path_to_grammar) self.graph = LabelGraph.from_txt(str(path_to_graph) + ".txt") self.grammar = CnfGrammar.from_cnf(str(path_to_grammar) + ".cnf")