def hellings(g: LabelGraph, gr: WeakCNF): result = LabelGraph(g.size) for variable in gr.variables: result.dict[variable] = Matrix.sparse(BOOL, g.size, g.size) for label in g.labels: result[Terminal(label)] = g[label].dup() for i, j, _ in zip(*result[Terminal(label)].select(lib.GxB_NONZERO).to_lists()): for production in gr.productions: if len(production.body) == 1 and production.body[0] == Terminal(label): head = production.head result.dict[head][i, j] = True if gr.generate_epsilon(): for i in range(g.size): result.dict[gr.start_symbol][i, i] = True changing = True while changing: changing = False for p in gr.productions: if len(p.body) == 2: for i, k in zip(*result.dict[p.body[0]].select(lib.GxB_NONZERO).to_lists()[:2]): for l, j in zip(*result.dict[p.body[1]].select(lib.GxB_NONZERO).to_lists()[:2]): if k == l: if (i, j) not in zip(*result.dict[p.head].select(lib.GxB_NONZERO).to_lists()[:2]): changing = True result.dict[p.head][i, j] = True return set(zip(*result.dict[gr.start_symbol].to_lists()[:2]))
def tensor_rsa_cfpq(g: LabelGraph, gr: RegexCFG): m = g.dup() graph_size = 0 for x in gr.boxes: for box in gr.boxes[x]: graph_size += len(box.states) rsa = LabelGraph(graph_size) heads = dict() cur = 0 for x in gr.boxes: for box in gr.boxes[x]: name = dict() for s in box.states: if s not in name: name[s] = cur cur += 1 if s in box.final_states: rsa.final_states.add(name[s]) rsa.start_states.add(name[box.start_state]) if box.start_state in box.final_states: for i in m.vertices: m[Variable(x)][i, i] = True for s in box.final_states: heads[(name[box.start_state], name[s])] = Variable(x) for v in box._transition_function._transitions: for label in box._transition_function._transitions[v]: to = box._transition_function._transitions[v][label] if label.value == label.value.lower(): rsa[label.value][name[v], name[to]] = True else: rsa[Variable(label.value)][name[v], name[to]] = True tc = m.get_intersection(rsa).get_transitive_closure() while True: prev = tc.nvals for i, j, _ in zip(*tc.select(lib.GxB_NONZERO).to_lists()): i_m, i_rsa = i // rsa.size, i % rsa.size j_m, j_rsa = j // rsa.size, j % rsa.size if (i_m in m.start_states) and (i_rsa in rsa.start_states): if (j_m in m.final_states) and (j_rsa in rsa.final_states): m[heads[(i_rsa, j_rsa)]][i_m, j_m] = True tmp = m.get_intersection(rsa) for label in tmp.labels: tc += tmp[label] tc = transitive_closure(tc) if prev == tc.nvals: break ans = set(zip(*m[gr.start_symbol].to_lists()[:2])) return ans
def tensor_cfg_cfpq(g: LabelGraph, gr: CFG): m = g.dup() graph_size = 0 for p in gr.productions: graph_size += len(p.body) + 1 rsa = LabelGraph(graph_size) heads = dict() cur = 0 for p in gr.productions: rsa.start_states.add(cur) heads[(cur, cur + len(p.body))] = p.head if len(p.body) == 0: for i in m.vertices: m[p.head][i, i] = True for unit in p.body: if isinstance(unit, Terminal): rsa[unit.value][cur, cur + 1] = True else: rsa[unit][cur, cur + 1] = True cur += 1 rsa.final_states.add(cur) cur += 1 tc = m.get_intersection(rsa).get_transitive_closure() while True: prev = tc.nvals for i, j, _ in zip(*tc.select(lib.GxB_NONZERO).to_lists()): i_m, i_rsa = i // rsa.size, i % rsa.size j_m, j_rsa = j // rsa.size, j % rsa.size if (i_m in m.start_states) and (i_rsa in rsa.start_states): if (j_m in m.final_states) and (j_rsa in rsa.final_states): m[heads[(i_rsa, j_rsa)]][i_m, j_m] = True tmp = m.get_intersection(rsa) for label in tmp.labels: tc += tmp[label] tc = transitive_closure(tc) if prev == tc.nvals: break ans = set(zip(*m[gr.start_symbol].to_lists()[:2])) return ans
def test_1(tmp_path): edges = ['0 a 1', '1 a 2', '2 a 3'] graph = tmp_path / 'graph.txt' graph.write_text('\n'.join(edges)) regex = tmp_path / 'regex.txt' regex.write_text('(a)(a)') g = LabelGraph.from_txt(graph) r = LabelGraph.from_regex(regex) actual = rpq(g, r) expected = Matrix.sparse(BOOL, 4, 4) expected[0, 2] = True expected[1, 3] = True assert expected.iseq(actual)
def test_auto(automatic_suite, tmp_path): graph = tmp_path / 'graph.txt' graph.write_text('\n'.join(automatic_suite['edges'])) regex = tmp_path / 'regex.txt' regex.write_text(automatic_suite['regex']) g = LabelGraph.from_txt(graph) r = LabelGraph.from_regex(regex) actual = rpq(g, r) paths = dict() for label in g.labels: for i, j, _ in zip(*g[label].select(lib.GxB_NONZERO).to_lists()): if (i, j) not in paths: paths[i, j] = set() paths[i, j].add(label) for k in range(g.size): for i in range(g.size): for j in range(g.size): if ((i, k) in paths) and ((k, j) in paths): if (i, j) not in paths: paths[i, j] = set() paths[i, j] |= set( map(lambda s: s[0] + s[1], product(paths[i, k], paths[k, j]))) expected = Matrix.sparse(BOOL, g.size, g.size) for i in range(g.size): for j in range(g.size): if (i, j) in paths: for path in paths[i, j]: if r.accepts(path): expected[i, j] = True break assert expected.iseq(actual)
def test_2(tmp_path): edges = [ '0 a 1', '1 e 4', '8 c 4', '7 a 8', '4 e 7', '1 a 5', '5 a 7', '1 b 2', '2 d 5', '0 b 3', '2 a 3', '3 b 6', '6 a 7', '7 b 9', '9 b 10', '5 d 6' ] graph = tmp_path / 'graph.txt' graph.write_text('\n'.join(edges)) regex = tmp_path / 'regex.txt' regex.write_text('(a+)(d|b)(a)(b)') g = LabelGraph.from_txt(graph) r = LabelGraph.from_regex(regex) actual = rpq(g, r) expected = Matrix.sparse(BOOL, 11, 11) expected[0, 6] = True expected[1, 9] = True expected[2, 9] = True assert expected.iseq(actual)
def test_manual_cfpq(manual_suite_cfpq, cfpq_algo, tmp_path): graph_file = tmp_path / 'graph.txt' graph_file.write_text('\n'.join(manual_suite_cfpq['edges'])) g = LabelGraph.from_txt(graph_file) if cfpq_algo.__name__ == 'tensor_rsa_cfpq': gr = RegexCFG.from_text(manual_suite_cfpq['cnf']) else: gr = RegexCFG.from_text(manual_suite_cfpq['cnf']).to_cnf() actual = cfpq_algo(g, gr) expected = manual_suite_cfpq['expected'] assert actual == expected
def test_benchmark_rpq(algo, graph, grammar): algo_name = algo['name'] g = LabelGraph.from_txt(graph['graph']) g_name = graph['name'] g_filename = graph['filename'] if algo_name == 'tensor_rsa_cfpq': r = RegexCFG.from_txt(grammar['grammar']) else: r = RegexCFG.from_txt(grammar['grammar']).to_cnf() r_name = grammar['name'] result_file = f'{g_name}.csv' result_file_path = f'./benchmark_cfpq/results/{result_file}' headers = [ 'Algorithm', 'Graph', 'Graph filename', 'Grammar', 'Time (in microseconds)', 'Control sum' ] if not os.path.exists(result_file_path): with open(result_file_path, mode='w+', newline='\n') as f: csv_writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, escapechar=' ') csv_writer.writerow(headers) with open(result_file_path, mode='a+', newline='\n', buffering=1) as f: csv_writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, escapechar=' ') start_time = time.time_ns() res = algo['algo'](g, r) end_time = time.time_ns() result_time = (end_time - start_time) // (10**3) results = [ algo_name, g_name, g_filename, r_name, result_time, len(res) ] csv_writer.writerow(results)
def mxm_cfpq(g: LabelGraph, gr: WeakCNF): m = LabelGraph(g.size) complex_productions = set() for production in gr.productions: if len(production.body) == 1: m[production.head] += g[production.body[0].value] elif len(production.body) == 2: complex_productions.add(production) if gr.generate_epsilon(): for i in range(g.size): m[gr.start_symbol][i, i] = True changed = True while changed: changed = False for production in complex_productions: old_nnz = m[production.head].nvals m[production.head] += m[production.body[0]] @ m[production.body[1]] new_nnz = m[production.head].nvals changed |= not old_nnz == new_nnz return set(zip(*m[gr.start_symbol].to_lists()[:2]))
help='path to graph.txt file') parser.add_argument('--regex', required=True, type=str, help='path to regex.txt file') parser.add_argument('--sources', required=False, type=str, help='path to sources.txt file') parser.add_argument('--destinations', required=False, type=str, help='path to destinations.txt file') args = parser.parse_args() g = LabelGraph.from_txt(args.graph) r = LabelGraph.from_regex(args.regex) print(str(args.graph) + " " + str(args.regex)) time_sum_1 = 0 time_sum_2 = 0 for i in range(5): time_1 = timeit.default_timer() res_1 = rpq(g, r) time_sum_1 += timeit.default_timer() - time_1 time_2 = timeit.default_timer() res_2 = rpq_with_linear_tc(g, r) time_sum_2 += timeit.default_timer() - time_2