Ejemplo n.º 1
0
def hellings(g: LabelGraph, gr: WeakCNF):
    result = LabelGraph(g.size)

    for variable in gr.variables:
        result.dict[variable] = Matrix.sparse(BOOL, g.size, g.size)

    for label in g.labels:
        result[Terminal(label)] = g[label].dup()
        for i, j, _ in zip(*result[Terminal(label)].select(lib.GxB_NONZERO).to_lists()):
            for production in gr.productions:
                if len(production.body) == 1 and production.body[0] == Terminal(label):
                    head = production.head
                    result.dict[head][i, j] = True

    if gr.generate_epsilon():
        for i in range(g.size):
            result.dict[gr.start_symbol][i, i] = True

    changing = True
    while changing:
        changing = False
        for p in gr.productions:
            if len(p.body) == 2:
                for i, k in zip(*result.dict[p.body[0]].select(lib.GxB_NONZERO).to_lists()[:2]):
                    for l, j in zip(*result.dict[p.body[1]].select(lib.GxB_NONZERO).to_lists()[:2]):
                        if k == l:
                            if (i, j) not in zip(*result.dict[p.head].select(lib.GxB_NONZERO).to_lists()[:2]):
                                changing = True
                                result.dict[p.head][i, j] = True

    return set(zip(*result.dict[gr.start_symbol].to_lists()[:2]))
Ejemplo n.º 2
0
def tensor_rsa_cfpq(g: LabelGraph, gr: RegexCFG):
    m = g.dup()

    graph_size = 0
    for x in gr.boxes:
        for box in gr.boxes[x]:
            graph_size += len(box.states)
    rsa = LabelGraph(graph_size)

    heads = dict()

    cur = 0
    for x in gr.boxes:
        for box in gr.boxes[x]:
            name = dict()
            for s in box.states:
                if s not in name:
                    name[s] = cur
                    cur += 1
                if s in box.final_states:
                    rsa.final_states.add(name[s])
            rsa.start_states.add(name[box.start_state])
            if box.start_state in box.final_states:
                for i in m.vertices:
                    m[Variable(x)][i, i] = True
            for s in box.final_states:
                heads[(name[box.start_state], name[s])] = Variable(x)
            for v in box._transition_function._transitions:
                for label in box._transition_function._transitions[v]:
                    to = box._transition_function._transitions[v][label]

                    if label.value == label.value.lower():
                        rsa[label.value][name[v], name[to]] = True
                    else:
                        rsa[Variable(label.value)][name[v], name[to]] = True

    tc = m.get_intersection(rsa).get_transitive_closure()

    while True:
        prev = tc.nvals
        for i, j, _ in zip(*tc.select(lib.GxB_NONZERO).to_lists()):
            i_m, i_rsa = i // rsa.size, i % rsa.size
            j_m, j_rsa = j // rsa.size, j % rsa.size
            if (i_m in m.start_states) and (i_rsa in rsa.start_states):
                if (j_m in m.final_states) and (j_rsa in rsa.final_states):
                    m[heads[(i_rsa, j_rsa)]][i_m, j_m] = True

        tmp = m.get_intersection(rsa)
        for label in tmp.labels:
            tc += tmp[label]
        tc = transitive_closure(tc)

        if prev == tc.nvals:
            break

    ans = set(zip(*m[gr.start_symbol].to_lists()[:2]))

    return ans
Ejemplo n.º 3
0
def tensor_cfg_cfpq(g: LabelGraph, gr: CFG):
    m = g.dup()

    graph_size = 0
    for p in gr.productions:
        graph_size += len(p.body) + 1
    rsa = LabelGraph(graph_size)

    heads = dict()

    cur = 0
    for p in gr.productions:
        rsa.start_states.add(cur)
        heads[(cur, cur + len(p.body))] = p.head
        if len(p.body) == 0:
            for i in m.vertices:
                m[p.head][i, i] = True
        for unit in p.body:
            if isinstance(unit, Terminal):
                rsa[unit.value][cur, cur + 1] = True
            else:
                rsa[unit][cur, cur + 1] = True
            cur += 1
        rsa.final_states.add(cur)
        cur += 1

    tc = m.get_intersection(rsa).get_transitive_closure()

    while True:
        prev = tc.nvals
        for i, j, _ in zip(*tc.select(lib.GxB_NONZERO).to_lists()):
            i_m, i_rsa = i // rsa.size, i % rsa.size
            j_m, j_rsa = j // rsa.size, j % rsa.size
            if (i_m in m.start_states) and (i_rsa in rsa.start_states):
                if (j_m in m.final_states) and (j_rsa in rsa.final_states):
                    m[heads[(i_rsa, j_rsa)]][i_m, j_m] = True

        tmp = m.get_intersection(rsa)
        for label in tmp.labels:
            tc += tmp[label]
        tc = transitive_closure(tc)

        if prev == tc.nvals:
            break

    ans = set(zip(*m[gr.start_symbol].to_lists()[:2]))

    return ans
Ejemplo n.º 4
0
def test_1(tmp_path):
    edges = ['0 a 1', '1 a 2', '2 a 3']

    graph = tmp_path / 'graph.txt'
    graph.write_text('\n'.join(edges))

    regex = tmp_path / 'regex.txt'
    regex.write_text('(a)(a)')

    g = LabelGraph.from_txt(graph)
    r = LabelGraph.from_regex(regex)

    actual = rpq(g, r)

    expected = Matrix.sparse(BOOL, 4, 4)
    expected[0, 2] = True
    expected[1, 3] = True

    assert expected.iseq(actual)
Ejemplo n.º 5
0
def test_auto(automatic_suite, tmp_path):
    graph = tmp_path / 'graph.txt'
    graph.write_text('\n'.join(automatic_suite['edges']))

    regex = tmp_path / 'regex.txt'
    regex.write_text(automatic_suite['regex'])

    g = LabelGraph.from_txt(graph)
    r = LabelGraph.from_regex(regex)

    actual = rpq(g, r)

    paths = dict()

    for label in g.labels:
        for i, j, _ in zip(*g[label].select(lib.GxB_NONZERO).to_lists()):
            if (i, j) not in paths:
                paths[i, j] = set()
            paths[i, j].add(label)

    for k in range(g.size):
        for i in range(g.size):
            for j in range(g.size):
                if ((i, k) in paths) and ((k, j) in paths):
                    if (i, j) not in paths:
                        paths[i, j] = set()
                    paths[i, j] |= set(
                        map(lambda s: s[0] + s[1],
                            product(paths[i, k], paths[k, j])))

    expected = Matrix.sparse(BOOL, g.size, g.size)
    for i in range(g.size):
        for j in range(g.size):
            if (i, j) in paths:
                for path in paths[i, j]:
                    if r.accepts(path):
                        expected[i, j] = True
                        break

    assert expected.iseq(actual)
Ejemplo n.º 6
0
def test_2(tmp_path):
    edges = [
        '0 a 1', '1 e 4', '8 c 4', '7 a 8', '4 e 7', '1 a 5', '5 a 7', '1 b 2',
        '2 d 5', '0 b 3', '2 a 3', '3 b 6', '6 a 7', '7 b 9', '9 b 10', '5 d 6'
    ]

    graph = tmp_path / 'graph.txt'
    graph.write_text('\n'.join(edges))

    regex = tmp_path / 'regex.txt'
    regex.write_text('(a+)(d|b)(a)(b)')

    g = LabelGraph.from_txt(graph)
    r = LabelGraph.from_regex(regex)

    actual = rpq(g, r)

    expected = Matrix.sparse(BOOL, 11, 11)
    expected[0, 6] = True
    expected[1, 9] = True
    expected[2, 9] = True

    assert expected.iseq(actual)
Ejemplo n.º 7
0
def test_manual_cfpq(manual_suite_cfpq, cfpq_algo, tmp_path):
    graph_file = tmp_path / 'graph.txt'
    graph_file.write_text('\n'.join(manual_suite_cfpq['edges']))

    g = LabelGraph.from_txt(graph_file)

    if cfpq_algo.__name__ == 'tensor_rsa_cfpq':
        gr = RegexCFG.from_text(manual_suite_cfpq['cnf'])
    else:
        gr = RegexCFG.from_text(manual_suite_cfpq['cnf']).to_cnf()

    actual = cfpq_algo(g, gr)
    expected = manual_suite_cfpq['expected']

    assert actual == expected
Ejemplo n.º 8
0
def test_benchmark_rpq(algo, graph, grammar):
    algo_name = algo['name']

    g = LabelGraph.from_txt(graph['graph'])
    g_name = graph['name']
    g_filename = graph['filename']

    if algo_name == 'tensor_rsa_cfpq':
        r = RegexCFG.from_txt(grammar['grammar'])
    else:
        r = RegexCFG.from_txt(grammar['grammar']).to_cnf()
    r_name = grammar['name']

    result_file = f'{g_name}.csv'
    result_file_path = f'./benchmark_cfpq/results/{result_file}'

    headers = [
        'Algorithm', 'Graph', 'Graph filename', 'Grammar',
        'Time (in microseconds)', 'Control sum'
    ]

    if not os.path.exists(result_file_path):
        with open(result_file_path, mode='w+', newline='\n') as f:
            csv_writer = csv.writer(f,
                                    delimiter=',',
                                    quoting=csv.QUOTE_NONNUMERIC,
                                    escapechar=' ')
            csv_writer.writerow(headers)

    with open(result_file_path, mode='a+', newline='\n', buffering=1) as f:
        csv_writer = csv.writer(f,
                                delimiter=',',
                                quoting=csv.QUOTE_NONNUMERIC,
                                escapechar=' ')

        start_time = time.time_ns()
        res = algo['algo'](g, r)
        end_time = time.time_ns()

        result_time = (end_time - start_time) // (10**3)

        results = [
            algo_name, g_name, g_filename, r_name, result_time,
            len(res)
        ]

        csv_writer.writerow(results)
Ejemplo n.º 9
0
def mxm_cfpq(g: LabelGraph, gr: WeakCNF):
    m = LabelGraph(g.size)

    complex_productions = set()

    for production in gr.productions:
        if len(production.body) == 1:
            m[production.head] += g[production.body[0].value]
        elif len(production.body) == 2:
            complex_productions.add(production)

    if gr.generate_epsilon():
        for i in range(g.size):
            m[gr.start_symbol][i, i] = True

    changed = True
    while changed:
        changed = False
        for production in complex_productions:
            old_nnz = m[production.head].nvals
            m[production.head] += m[production.body[0]] @ m[production.body[1]]
            new_nnz = m[production.head].nvals
            changed |= not old_nnz == new_nnz
    return set(zip(*m[gr.start_symbol].to_lists()[:2]))
Ejemplo n.º 10
0
                        help='path to graph.txt file')
    parser.add_argument('--regex',
                        required=True,
                        type=str,
                        help='path to regex.txt file')
    parser.add_argument('--sources',
                        required=False,
                        type=str,
                        help='path to sources.txt file')
    parser.add_argument('--destinations',
                        required=False,
                        type=str,
                        help='path to destinations.txt file')
    args = parser.parse_args()

    g = LabelGraph.from_txt(args.graph)
    r = LabelGraph.from_regex(args.regex)

    print(str(args.graph) + " " + str(args.regex))

    time_sum_1 = 0
    time_sum_2 = 0

    for i in range(5):
        time_1 = timeit.default_timer()
        res_1 = rpq(g, r)
        time_sum_1 += timeit.default_timer() - time_1

        time_2 = timeit.default_timer()
        res_2 = rpq_with_linear_tc(g, r)
        time_sum_2 += timeit.default_timer() - time_2