def random_data(request): vertices_num, regex_str = request.param edges_num = vertices_num * (vertices_num - 1) // 5 v_from = [random.randint(0, vertices_num) for _ in range(edges_num)] v_to = [random.randint(0, vertices_num) for _ in range(edges_num)] values = [random.choice(['a', 'b', 'c', 'd']) for _ in range(edges_num)] edges_list = zip(v_from, values, v_to) graph = BMGraph.from_edges_list(edges_list) regex = BMGraph.from_regex_string(regex_str) return graph, regex
def test_hellings_2(): test_path = os.path.join(os.getcwd(), 'tests/data/cfpq/test2') graph = BMGraph.from_edges_file(os.path.join(test_path, 'graph.txt')) grammar = GrammarAlgos.from_grammar_file( os.path.join(test_path, 'grammar.txt')) adj_matrix = GrammarAlgos.Hellings(grammar, graph) expected = {(0, 1), (3, 3)} actual = set(BMGraph.get_reachable_vertices(adj_matrix)) assert expected == actual
def test_cfpq_matrix_1(): test_path = os.path.join(os.getcwd(), 'tests/data/cfpq/test1') graph = BMGraph.from_edges_file(os.path.join(test_path, 'graph.txt')) grammar = GrammarAlgos.from_grammar_file( os.path.join(test_path, 'grammar.txt')) adj_matrix = GrammarAlgos.cfpq_matrix_multiplication(grammar, graph) expected = {(0, 2), (0, 3), (1, 2), (1, 3), (2, 2), (2, 3)} actual = set(BMGraph.get_reachable_vertices(adj_matrix)) assert expected == actual
def test_cfpq_tensor_3(): test_path = os.path.join(os.getcwd(), 'tests/data/cfpq/test3') graph = BMGraph.from_edges_file( os.path.join(test_path, 'graph.txt')) grammar = GrammarAlgos.from_grammar_file( os.path.join(test_path, 'grammar.txt')) adj_matrix = GrammarAlgos.cfpq_tensor_product(grammar, graph) expected = {(0, 0), (1, 1), (0, 2), (3, 3), (2, 2)} actual = set(BMGraph.get_reachable_vertices(adj_matrix)) assert expected == actual
def test_intersection_1(): test_path = os.path.join(os.getcwd(), 'tests/data/test1') graph = BMGraph.from_edges_file(os.path.join(test_path, 'graph.txt')) regex = BMGraph.from_regex_file(os.path.join(test_path, 'regex.txt')) intersection = graph.intersect(regex) ans = intersection.to_automaton() a = Symbol('a') b = Symbol('b') assert ans.accepts([]) assert ans.accepts([a]) assert ans.accepts([a, a, a]) assert not ans.accepts([b])
def main(): parser = ArgumentParser(description="""Intersect graph and regex, and show reachability of graph vertices""" ) parser.add_argument( 'path_to_graph', help='Path to graph represented in \'from value to\' tuples') parser.add_argument('path_to_regex', help='Path to regex') parser.add_argument( '--from', dest='vertices_from', help='Optional: source vertices from graph in \'v1 v2 v3 ...\' form') parser.add_argument( '--to', dest='vertices_to', help= 'Optional: destination vertices from graph in \'v1 v2 v3 ...\' form') args = parser.parse_args() graph = BMGraph.from_edges_file(args.path_to_graph) regex = BMGraph.from_regex_file(args.path_to_regex) if args.vertices_from is not None: vertices = read_vertices(args.vertices_from) graph.start_states = vertices if args.vertices_to is not None: vertices = read_vertices(args.vertices_to) graph.final_states = vertices intersection = graph.intersect(regex) print('Edges for each label:') for (value, matrix) in intersection.matrices.items(): print('{} has {} edges'.format(value, matrix.nvals)) closure = intersection.transitive_closure() print('Reachable vertices:') reachable = BMGraph.get_reachable_vertices(closure) for (v_from, v_to) in reachable: if v_from in intersection.start_states and v_to in intersection.final_states: print('{} -> {}'.format(v_from // regex.states_amount, v_to // regex.states_amount))
def cfpq_tensor_product(grammar: CFG, graph: BMGraph): res = graph.dup() rfa = BMGraph() rfa_heads = dict() rfa.states_amount = sum( [len(prod.body) + 1 for prod in grammar.productions]) rfa.states = set(range(rfa.states_amount)) index = 0 for prod in grammar.productions: start_state = index final_state = index + len(prod.body) rfa.start_states.add(start_state) rfa.final_states.add(final_state) rfa_heads[(start_state, final_state)] = prod.head.value for var in prod.body: matrix = rfa.matrices.get( var.value, Matrix.sparse(BOOL, rfa.states_amount, rfa.states_amount)) matrix[index, index + 1] = True rfa.matrices[var.value] = matrix index += 1 index += 1 for prod in grammar.productions: if len(prod.body) == 0: matrix = Matrix.sparse(BOOL, graph.states_amount, graph.states_amount) for i in range(graph.states_amount): matrix[i, i] = True res.matrices[prod.head] = matrix is_changing = True while is_changing: is_changing = False intersection = rfa.intersect(res) closure = intersection.transitive_closure() for i, j, _ in zip(*closure.to_lists()): rfa_from, rfa_to = i // res.states_amount, j // res.states_amount graph_from, graph_to = i % res.states_amount, j % res.states_amount if (rfa_from, rfa_to) not in rfa_heads: continue var = rfa_heads[(rfa_from, rfa_to)] matrix = res.matrices.get( var, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount)) if matrix.get(graph_from, graph_to) is None: is_changing = True matrix[graph_from, graph_to] = True res.matrices[var] = matrix return res.matrices.get( grammar.start_symbol, Matrix.sparse(BOOL, graph.states_amount, graph.states_amount))
def closure_benchmark(): path_to_data = os.path.join(os.getcwd(), 'benchmarks/refinedDataForRPQ') tests = ['LUBM1.9M'] for test in tests: path_to_test = os.path.join(path_to_data, test) graph = BMGraph.from_edges_file('{}/{}.txt'.format(path_to_test, test)) output = open('{}/output.csv'.format(path_to_test), 'w+') regex_dir = os.path.join(path_to_test, 'regexes') for filename in os.listdir(regex_dir): closure = None res = None regex_str = os.path.join(regex_dir, filename) regex = BMGraph.from_regex_file(regex_str, False) sum = 0 for _ in range(5): start = time.monotonic() res = graph.intersect(regex) closure = transitive_closure_sq(res) end = time.monotonic() sum += end - start sq_pairs = closure.nvals sq_time = sum / 5 start = time.monotonic() for (value, matrix) in res.matrices.items(): v, n = value, matrix.nvals end = time.monotonic() sq_pairs_time = end - start sum = 0 for _ in range(5): start = time.monotonic() res = graph.intersect(regex) closure = transitive_closure_mp(res) end = time.monotonic() sum += end - start mp_pairs = closure.nvals mp_time = sum / 5 start = time.monotonic() for (value, matrix) in res.matrices.items(): v, n = value, matrix.nvals end = time.monotonic() mp_pairs_time = end - start if (sq_pairs != mp_pairs): print('{}, {}, {}'.format(sq_pairs, mp_pairs, filename)) assert sq_pairs == mp_pairs output.write( '{:s},{:s},{:d},{:.3f},{:.3f},{:d},{:.3f},{:.3f}\r\n'.format( test, filename, sq_pairs, sq_time, sq_pairs_time, mp_pairs, mp_time, mp_pairs_time)) output.close()
def cfpq_benchmark(): path_to_data = os.path.join(os.getcwd(), 'benchmarks/dataForCFPQ') tests = ['FullGraph', 'MemoryAliases', 'WorstCase'] for test in tests: path_to_test = os.path.join(path_to_data, test) output = open('{}/output.csv'.format(path_to_test), 'w+') output.write('test,graph,grammar,algo_name,algo_time\r\n') graph_dir = os.path.join(path_to_test, 'graphs') # for graph_name in sorted(os.listdir(graph_dir), key=lambda s: int(s.split('_')[1])): for graph_name in os.listdir(graph_dir): graph_path = os.path.join(graph_dir, graph_name) graph = BMGraph.from_edges_file(graph_path) grammar_dir = os.path.join(path_to_test, 'grammars') for grammar_name in os.listdir(grammar_dir): grammar_path = os.path.join(grammar_dir, grammar_name) grammar = GrammarAlgos.from_grammar_file(grammar_path) start = time.monotonic() hellings_res = GrammarAlgos.Hellings(grammar, graph) end = time.monotonic() algo_name = 'hellings' algo_time = end - start res_str = '{:s},{:s},{:s},{:s},{:.3f}\r\n'.format( test, graph_name, grammar_name, algo_name, algo_time) print(res_str) output.write(res_str) start = time.monotonic() mult_res = GrammarAlgos.cfpq_matrix_multiplication( grammar, graph) end = time.monotonic() algo_name = 'mult' algo_time = end - start res_str = '{:s},{:s},{:s},{:s},{:.3f}\r\n'.format( test, graph_name, grammar_name, algo_name, algo_time) print(res_str) output.write(res_str) start = time.monotonic() tensor_res = GrammarAlgos.cfpq_tensor_product(grammar, graph) end = time.monotonic() algo_name = 'tensor' algo_time = end - start res_str = '{:s},{:s},{:s},{:s},{:.3f}\r\n'.format( test, graph_name, grammar_name, algo_name, algo_time) print(res_str) output.write(res_str) wcnf = GrammarAlgos.to_wcnf(grammar) start = time.monotonic() tensor_wcnf_res = GrammarAlgos.cfpq_tensor_product(wcnf, graph) end = time.monotonic() algo_name = 'tensor_wcnf' algo_time = end - start res_str = '{:s},{:s},{:s},{:s},{:.3f}\r\n'.format( test, graph_name, grammar_name, algo_name, algo_time) print(res_str) output.write(res_str) assert (hellings_res == mult_res) assert (mult_res == tensor_res) assert (tensor_res == tensor_wcnf_res) output.close()