def cfpq_hellings(g: LabelGraph, cfg: GrammarCNF): num_vert = g.num_vert start_sym = cfg.start_symbol result = LabelGraph() result.num_vert = num_vert m = deque() for variable in cfg.variables: result.graph_dict[variable] = Matrix.sparse(BOOL, num_vert, num_vert) if cfg.generate_epsilon(): for v in range(num_vert): result.graph_dict[start_sym][v, v] = True for label in g.graph_dict: term = Terminal(label) result.graph_dict[term] = g.graph_dict[label].dup() for v_from, v_to in g.get_edges(label): for production in cfg.productions: if (len(production.body) == 1 and production.body[0] == term): head = production.head result.graph_dict[head][v_from, v_to] = True for label in result.graph_dict: for i, j in result.get_edges(label): m.append((label, i, j)) # 3rd step: cfpq on modified matrix while m: var, v, u = m.popleft() for var_left in result.graph_dict: for v_new, v_ in result.get_edges(var_left): if (v_ == v): for production in cfg.pair_productions: if (production.body[1] == var and production.body[0] == var_left): if (v_new, u) not in result.get_edges( production.head): result.graph_dict[production.head][v_new, u] = True m.append((production.head, v_new, u)) for var_right in result.graph_dict: for u_, u_new in result.get_edges(var_right): if (u_ == u): for production in cfg.pair_productions: if (production.body[1] == var_right and production.body[0] == var): if (v, u_new) not in result.get_edges( production.head): result.graph_dict[production.head][ v, u_new] = True m.append((production.head, v, u_new)) return result.graph_dict[start_sym]
def test_cfpq_grammar_2(): for i in range(NUM_GRAPHS): g_2 = GrammarCNF.from_text(TEST_GRAMMARS[1]) for i in range(NUM_GRAPHS): graph = LabelGraph().from_txt( os.path.join(DATA_DIR, f'graph_{i}.txt')) result = cfpq_matrix_mult(graph, g_2) expected = set() with open(os.path.join(DATA_DIR, f'expected_{1}_{i}.txt'), 'r') as f: for line in f: v, to = line.split(' ') expected.add((int(v), int(to))) edges = set(LabelGraph.get_reachable(result)) assert edges == expected
def test_cfpq_brackets(): for i in range(NUM_GRAPHS): brackets_cnf = GrammarCNF.from_text(TEST_GRAMMARS[0]) for i in range(NUM_GRAPHS): graph = LabelGraph().from_txt( os.path.join(DATA_DIR, f'graph_{i}.txt')) result = cfpq_hellings(graph, brackets_cnf) expected = set() with open(os.path.join(DATA_DIR, f'expected_{0}_{i}.txt'), 'r') as f: for line in f: v, to = line.split(' ') expected.add((int(v), int(to))) edges = set(LabelGraph.get_reachable(result)) assert edges == expected
def cfpq_matrix_mult(g: LabelGraph, cfg: GrammarCNF): num_vert = g.num_vert if (num_vert == 0): return Matrix.sparse(BOOL, num_vert, num_vert) result = LabelGraph() start_sym = cfg.start_symbol result.num_vert = num_vert for variable in cfg.variables: result.graph_dict[variable] = Matrix.sparse(BOOL, num_vert, num_vert) for label in g.graph_dict: term = Terminal(label) result.graph_dict[term] = g.graph_dict[label].dup() for v_from, v_to in g.get_edges(label): for production in cfg.productions: if (len(production.body) == 1 and production.body[0] == term): head = production.head result.graph_dict[head][v_from, v_to] = True if cfg.generate_epsilon(): for v in g.vertices: result.graph_dict[start_sym][v, v] = True matrix_changing = True with semiring.LOR_LAND_BOOL: while matrix_changing: matrix_changing = False for production in cfg.pair_productions: head = production.head body = production.body prev_nvals = result.graph_dict[head].nvals tmp = result.graph_dict[body[0]] @ result.graph_dict[body[1]] result.graph_dict[head] = result.graph_dict[head] + tmp if (prev_nvals != result.graph_dict[head].nvals): matrix_changing = True return result.graph_dict[start_sym]
def test_cfpq_empty_graph(): brackets_cnf = GrammarCNF.from_text(TEST_GRAMMARS[0]) result = cfpq_matrix_mult(LabelGraph(), brackets_cnf) expected = set() edges = set(LabelGraph.get_reachable(result)) assert edges == expected
import os from src.antlr_utils import parse from src.grammar_cnf import GrammarCNF import pytest @pytest.mark.parametrize("grammar", [GrammarCNF.from_txt("dbql_grammar.txt")]) @pytest.mark.parametrize( "test_input, expected", [ (''' connect "azat/home/db" ; select edges from query term("s")*|term("b")+.term("c")?; ''', True), (''' select edges from name "sparsegraph" ; ''', True), (''' connect "azat/home/db" ; ''', True), (''' connect "azat/home/db" ; select edges from name "sparsegraph_256.txt" ; ''', True), (''' connect "azat/home/db" ; select edges from startAndFinal(set(1, 2, 3), set (4, 5, 6)) of name "sparsegraph" ; ''', True),
def test_1_word_accepts(): cnf = GrammarCNF.from_text(TEST_GRAMMARS[0]) word_accepts = 'aabb' assert cnf.contains(word_accepts) is True
def test_2_epsilon_not_accepts(): cnf = GrammarCNF.from_text(TEST_GRAMMARS[1]) assert cnf.contains('') is False
def test_3_accepts(): cnf = GrammarCNF.from_text('S -> \( S \) | eps') assert cnf.contains('()') assert cnf.contains('(())')
def test_2_accepts(): cnf = GrammarCNF.from_text(TEST_GRAMMARS[1]) assert cnf.contains('abab') assert cnf.contains('ab')
def test_1_epsilon_accepts(): cnf = GrammarCNF.from_text(TEST_GRAMMARS[0]) word_epsilon = '' assert cnf.contains(word_epsilon) is True