Ejemplo n.º 1
0
def test_mutagenicity_with_deleted_nodes(mutagenicity_graphs, dataframe_mutagenicity, graph_name_source_target):
    gr_name_src, gr_name_trgt = ['mutagen/' + name for name in graph_name_source_target]
    graph_name_source, graph_name_target = graph_name_source_target
    graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0]
    graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0]

    cst_cost_node = 11.0
    cst_cost_edge = 1.1
    ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node,
                                   cst_cost_edge, cst_cost_edge, 'dirac'))

    # Reproduce the source graph with more nodes
    new_gr_src = Graph(gr_name_src, 'gr.xls', len(graph_source)+2)
    for node in graph_source.nodes:
        new_gr_src.add_node(node)

    for idx, edges in graph_source.get_edges().items():
        for edge in edges:
            if edge is None:
                continue
            new_gr_src.add_edge(edge)

    new_gr_src.add_node(Node(len(graph_source), LabelNodeMutagenicity('C')))
    new_gr_src.add_node(Node(len(graph_source)+1, LabelNodeMutagenicity('N')))

    # Add random Edges
    for _ in range(4):
        new_gr_src.add_edge(Edge(len(graph_source),
                                 random.randint(0, len(graph_source)-1),
                                 LabelEdge(0)))

    for _ in range(6):
        new_gr_src.add_edge(Edge(len(graph_source) + 1,
                                 random.randint(0, len(graph_source)),
                                 LabelEdge(0)))




    new_gr_src.remove_node_by_idx(len(graph_source))
    new_gr_src.remove_node_by_idx(len(graph_source))

    results = ged.compute_edit_distance(new_gr_src, graph_target)
    expected = dataframe_mutagenicity.loc[gr_name_src, gr_name_trgt]
    # import numpy as np
    # np.savetxt(f'_c_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C), fmt='%10.3f', delimiter=';')
    # np.savetxt(f'c_star_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C_star), fmt='%10.3f', delimiter=';')
    print(f'###### diff {results - expected}')
    print(f'{graph_name_source_target}: new dist {results} - old dist {expected}')
    print(f'exp {expected}')
    assert results == expected
Ejemplo n.º 2
0
def define_graphs():
    ged = GED(EditCostLetter(1., 1., 1., 1., 'manhattan'))

    n, m = 4, 3
    graph_source = Graph('gr_source', 'gr_source.gxl', n)
    graph_target = Graph('gr_target', 'gr_targe.gxl', m)

    # Init graph source: add nodes and edges
    graph_source.add_node(Node(0, LabelNodeLetter(1, 0)))
    graph_source.add_node(Node(1, LabelNodeLetter(2, 0)))
    graph_source.add_node(Node(2, LabelNodeLetter(1, 0)))
    graph_source.add_node(Node(3, LabelNodeLetter(3, 0)))

    graph_source.add_edge(Edge(0, 1, LabelEdge(0)))
    graph_source.add_edge(Edge(1, 2, LabelEdge(0)))
    graph_source.add_edge(Edge(1, 3, LabelEdge(0)))
    graph_source.add_edge(Edge(2, 3, LabelEdge(0)))

    # Init graph target: add nodes and edges
    graph_target.add_node(Node(0, LabelNodeLetter(3, 0)))
    graph_target.add_node(Node(1, LabelNodeLetter(2, 0)))
    graph_target.add_node(Node(2, LabelNodeLetter(2, 0)))

    graph_target.add_edge(Edge(0, 1, LabelEdge(0)))
    graph_target.add_edge(Edge(1, 2, LabelEdge(0)))

    return ged, graph_source, graph_target
Ejemplo n.º 3
0
def test_NCI1(NCI1_graphs, graph_name_source_target, expected):
    graph_name_source, graph_name_target = graph_name_source_target
    graph_source = [graph for graph in NCI1_graphs if graph.name == graph_name_source][0]
    graph_target = [graph for graph in NCI1_graphs if graph.name == graph_name_target][0]

    cst_cost_node = 1.0
    cst_cost_edge = 1.0
    ged = GED(EditCostNCI1(cst_cost_node, cst_cost_node,
                           cst_cost_edge, cst_cost_edge, 'dirac', alpha=0.9))

    results = ged.compute_edit_distance(graph_source, graph_target, heuristic=True)


    print(f'ged : {results}')

    assert round(results, 5) == expected
Ejemplo n.º 4
0
def test_heuristic_inverse(mutagenicity_graphs, graph_name_source_target):
    graph_name_source, graph_name_target = graph_name_source_target
    graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0]
    graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0]

    cst_cost_node = 11.0
    cst_cost_edge = 1.1
    ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node,
                                   cst_cost_edge, cst_cost_edge, 'dirac'))

    results = ged.compute_edit_distance(graph_source, graph_target, heuristic=True)
    results_inv = ged.compute_edit_distance(graph_target, graph_source, heuristic=True)

    print(f'result: {results}')
    print(f'result_inv: {results_inv}')

    assert results == results_inv
Ejemplo n.º 5
0
def test_aids(aids_graphs, dataframe_aids, graph_name_source, graph_name_target, gr_name_src, gr_name_trgt):
    graph_source = [graph for graph in aids_graphs if graph.name == graph_name_source][0]
    graph_target = [graph for graph in aids_graphs if graph.name == graph_name_target][0]

    cst_cost_node = 1.1
    cst_cost_edge = 0.1
    ged = GED(EditCostAIDS(cst_cost_node, cst_cost_node,
                           cst_cost_edge, cst_cost_edge, 'dirac'))

    results = ged.compute_edit_distance(graph_source, graph_target)
    expected = dataframe_aids.loc[gr_name_src, gr_name_trgt]

    print(ged.C.base)
    print(ged.C_star.base)

    assert results == expected
    assert False
Ejemplo n.º 6
0
def test_mutagenicity_alpha(mutagenicity_graphs, graph_name_source_target):
    graph_name_source, graph_name_target = graph_name_source_target
    graph_source = [
        graph for graph in mutagenicity_graphs
        if graph.name == graph_name_source
    ][0]
    graph_target = [
        graph for graph in mutagenicity_graphs
        if graph.name == graph_name_target
    ][0]

    cst_cost_node = 11.0
    cst_cost_edge = 1.1
    edit_cost = EditCostMutagenicity(cst_cost_node, cst_cost_node,
                                     cst_cost_edge, cst_cost_edge, 'dirac')
    edit_cost_alpha = EditCostMutagenicity(cst_cost_node,
                                           cst_cost_node,
                                           cst_cost_edge,
                                           cst_cost_edge,
                                           'dirac',
                                           alpha=0.5)
    ged = GED(edit_cost)
    ged_alpha = GED(edit_cost_alpha)

    results = ged_alpha.compute_edit_distance(graph_source, graph_target)
    expected = ged.compute_edit_distance(graph_source, graph_target) / 2.

    assert results == expected
Ejemplo n.º 7
0
def test_aids_alpha(aids_graphs, graph_name_source, graph_name_target):
    graph_source = [
        graph for graph in aids_graphs if graph.name == graph_name_source
    ][0]
    graph_target = [
        graph for graph in aids_graphs if graph.name == graph_name_target
    ][0]

    cst_cost_node = 1.1
    cst_cost_edge = 0.1
    edit_cost = EditCostAIDS(cst_cost_node, cst_cost_node, cst_cost_edge,
                             cst_cost_edge, 'dirac')
    edit_cost_alpha = EditCostAIDS(cst_cost_node,
                                   cst_cost_node,
                                   cst_cost_edge,
                                   cst_cost_edge,
                                   'dirac',
                                   alpha=0.5)
    ged = GED(edit_cost)
    ged_alpha = GED(edit_cost_alpha)

    results = ged_alpha.compute_edit_distance(graph_source, graph_target)
    expected = ged.compute_edit_distance(graph_source, graph_target) / 2.

    assert results == expected
Ejemplo n.º 8
0
def test_with_verified_data(letter_graphs, dataframe_letter, graph_source_target, accuracy):
    gr_name_src, gr_name_trgt = [name[0] + '/' + name for name in graph_source_target]
    graph_source, graph_target = [graph for graph in letter_graphs if graph.name in graph_source_target]

    # print(graph_source)
    # print(graph_target)
    cst_cost_node = 0.9
    cst_cost_edge = 2.3
    ged = GED(EditCostLetter(cst_cost_node, cst_cost_node,
                             cst_cost_edge, cst_cost_edge, 'euclidean'))

    results = ged.compute_edit_distance(graph_source, graph_target)
    expected = dataframe_letter.loc[gr_name_src, gr_name_trgt]
    np.set_printoptions(precision=2)
    print(np.asarray(ged.C))
    print(np.asarray(ged.C_star))

    print(f'res {results}')
    print(f'exp {expected}')
    print(f'###### diff {results - expected}')
    # assert results == expected
    assert (results - expected) < accuracy
Ejemplo n.º 9
0
def test_mutagenicity(mutagenicity_graphs, dataframe_mutagenicity, graph_name_source_target):
    gr_name_src, gr_name_trgt = ['mutagen/' + name for name in graph_name_source_target]
    graph_name_source, graph_name_target = graph_name_source_target
    graph_source = [graph for graph in mutagenicity_graphs if graph.name == graph_name_source][0]
    graph_target = [graph for graph in mutagenicity_graphs if graph.name == graph_name_target][0]
    # print(graph_source)
    # print(graph_target)
    cst_cost_node = 11.0
    cst_cost_edge = 1.1
    ged = GED(EditCostMutagenicity(cst_cost_node, cst_cost_node,
                                   cst_cost_edge, cst_cost_edge, 'dirac'))



    results = ged.compute_edit_distance(graph_source, graph_target)
    expected = dataframe_mutagenicity.loc[gr_name_src, gr_name_trgt]
    # import numpy as np
    # np.savetxt(f'_c_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C), fmt='%10.3f', delimiter=';')
    # np.savetxt(f'c_star_{"X".join(graph_name_source_target)}.csv', np.asarray(ged.C_star), fmt='%10.3f', delimiter=';')
    # print(f'###### diff {results - expected}')
    # print(f'{graph_name_source_target}: new dist {results} - old dist {expected}')
    # print(f'exp {expected}')
    assert results == expected
Ejemplo n.º 10
0
def test_simple_alpha(define_graphs):
    graph_source, graph_target = define_graphs

    # edit_cost = EditCostLetter(1., 1., 1., 1., 'euclidean')
    edit_cost_alpha = EditCostLetter(1., 1., 1., 1., 'euclidean', alpha=0.5)

    # ged = GED(edit_cost)
    ged_alpha = GED(edit_cost_alpha)

    # cost = ged.compute_edit_distance(graph_source, graph_target)
    cost_alpha = ged_alpha.compute_edit_distance(graph_source, graph_target)

    expected_cost_alpha = 2.

    expected_C = np.array([[2., 1., 1., 1., np.inf, np.inf, np.inf],
                           [1., 0., 0., np.inf, 1., np.inf, np.inf],
                           [2., 1., 1., np.inf, np.inf, 1., np.inf],
                           [0., 1., 1., np.inf, np.inf, np.inf, 1.],
                           [1., np.inf, np.inf, 0., 0., 0., 0.],
                           [np.inf, 1., np.inf, 0., 0., 0., 0.],
                           [np.inf, np.inf, 1., 0., 0., 0., 0.]])
    expected_C_alpha = expected_C / 2

    expected_C_star = np.array([[2., 2., 1., 2., np.inf, np.inf, np.inf],
                                [3., 1., 2., np.inf, 4., np.inf, np.inf],
                                [3., 1., 2., np.inf, np.inf, 3., np.inf],
                                [1., 1., 2., np.inf, np.inf, np.inf, 3.],
                                [2., np.inf, np.inf, 0., 0., 0., 0.],
                                [np.inf, 3., np.inf, 0., 0., 0., 0.],
                                [np.inf, np.inf, 2., 0., 0., 0., 0.]])
    expected_C_star_alpha = expected_C_star / 2

    print(ged_alpha.C.base)

    assert np.array_equal(np.asarray(ged_alpha.C), expected_C_alpha)
    assert np.array_equal(np.asarray(ged_alpha.C_star), expected_C_star_alpha)
    assert cost_alpha == expected_cost_alpha
Ejemplo n.º 11
0
def test_letter_alpha_0_5(graphs, graph_source_target, accuracy):
    graph_source, graph_target = [
        graph for graph in graphs if graph.name in graph_source_target
    ]

    cst_cost_node = 0.9
    cst_cost_edge = 2.3
    edit_cost = EditCostLetter(cst_cost_node, cst_cost_node, cst_cost_edge,
                               cst_cost_edge, 'euclidean')
    edit_cost_alpha = EditCostLetter(cst_cost_node,
                                     cst_cost_node,
                                     cst_cost_edge,
                                     cst_cost_edge,
                                     'euclidean',
                                     alpha=0.5)
    ged = GED(edit_cost)
    ged_alpha = GED(edit_cost_alpha)

    results = ged_alpha.compute_edit_distance(graph_source, graph_target)
    expected = ged.compute_edit_distance(graph_source, graph_target) / 2.

    assert (results - expected) < accuracy
Ejemplo n.º 12
0
def test_alpha_0_25(define_graphs):
    graph_source, graph_target = define_graphs

    edit_cost = EditCostLetter(1., 1., 1., 1., 'euclidean')
    edit_cost_alpha = EditCostLetter(1., 1., 1., 1., 'euclidean', alpha=0.25)
    ged = GED(edit_cost)
    ged_alpha = GED(edit_cost_alpha)

    cost = ged.compute_edit_distance(graph_source, graph_target)
    cost_alpha = ged_alpha.compute_edit_distance(graph_source, graph_target)

    expected_cost_alpha = 2.

    expected_C = np.array([[2., 1., 1., 1., np.inf, np.inf, np.inf],
                           [1., 0., 0., np.inf, 1., np.inf, np.inf],
                           [2., 1., 1., np.inf, np.inf, 1., np.inf],
                           [0., 1., 1., np.inf, np.inf, np.inf, 1.],
                           [1., np.inf, np.inf, 0., 0., 0., 0.],
                           [np.inf, 1., np.inf, 0., 0., 0., 0.],
                           [np.inf, np.inf, 1., 0., 0., 0., 0.]])
    expected_C_alpha = expected_C / 4

    expected_C_star_alpha = np.array(
        [[0.5, 1., 0.25, 1., np.inf, np.inf, np.inf],
         [1.75, 0.75, 1.5, np.inf, 2.5, np.inf, np.inf],
         [1.25, 0.25, 1., np.inf, np.inf, 1.75, np.inf],
         [0.75, 0.25, 1., np.inf, np.inf, np.inf, 1.75],
         [1., np.inf, np.inf, 0., 0., 0., 0.],
         [np.inf, 1.75, np.inf, 0., 0., 0., 0.],
         [np.inf, np.inf, 1., 0., 0., 0., 0.]])
    # expected_C_star_alpha = expected_C_star / 2
    print(ged_alpha.C_star.base)

    assert np.array_equal(np.asarray(ged_alpha.C), expected_C_alpha)
    assert np.array_equal(np.asarray(ged_alpha.C_star), expected_C_star_alpha)
    assert cost_alpha == expected_cost_alpha
Ejemplo n.º 13
0
def ged():
    graph_edit_distance = GED(EditCostAIDS(1.1, 1.1, 0.1, 0.1, 'dirac'))
    return graph_edit_distance