Ejemplo n.º 1
0
def test_core_framework():
    """Picklability test for the Core kernel Framework [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 4))

    base_graph_kernel = (WeisfeilerLehman,
                         dict(base_graph_kernel=VertexHistogram))
    core_framework = CoreFramework(verbose=verbose,
                                   normalize=normalize,
                                   base_graph_kernel=base_graph_kernel)

    kernel = [{
        "name": "core_framework"
    }, {
        "name": "weisfeiler_lehman"
    }, {
        "name": "vertex_histogram"
    }]
    gk = GraphKernel(kernel=kernel, verbose=verbose, normalize=normalize)
    core_framework.fit(train)
    assert is_picklable(core_framework)
    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 2
0
def test_graphlet_sampling():
    """Picklability test for the Graphlet Sampling Kernel [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 3))

    gs_kernel = GraphletSampling(verbose=verbose,
                                 normalize=normalize,
                                 sampling=dict(n_samples=50))
    gk = GraphKernel(kernel={
        "name": "graphlet_sampling",
        "sampling": {
            "n_samples": 50
        }
    },
                     verbose=verbose,
                     normalize=normalize)
    gs_kernel.fit(train)
    assert is_picklable(gs_kernel)
    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 3
0
def test_pyramid_match():
    """Picklability test for the Pyramid Match kernel [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 3))

    pm_kernel = PyramidMatch(verbose=verbose, normalize=normalize)
    gk = GraphKernel(kernel={"name": "pyramid_match"},
                     verbose=verbose,
                     normalize=normalize)
    pm_kernel.fit(train)
    assert is_picklable(pm_kernel)
    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 4
0
def test_edge_histogram():
    """Picklability test for the Edge Histogram kernel [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('el', 4))

    eh_kernel = EdgeHistogram(verbose=verbose, normalize=normalize)
    gk = GraphKernel(kernel={"name": "edge_histogram"},
                     verbose=verbose,
                     normalize=normalize)

    eh_kernel.fit(train)
    assert is_picklable(eh_kernel)
    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 5
0
def test_odd_sth():
    """Picklability test for the ODD-STh kernel [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 4))

    odd_sth_kernel = OddSth(verbose=verbose, normalize=normalize)
    gk = GraphKernel(kernel={"name": "odd_sth"},
                     verbose=verbose,
                     normalize=normalize)

    odd_sth_kernel.fit(train)
    assert is_picklable(odd_sth_kernel)
    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 6
0
def test_neighborhood_subgraph_pairwise_distance():
    """Picklability test for the Neighborhood Subgraph Pairwise Distance kernel [+ generic-wrapper]."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(5, 10),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 5, 'el', 4))

    nspd_kernel = NeighborhoodSubgraphPairwiseDistance(verbose=verbose,
                                                       normalize=normalize)
    gk = GraphKernel(
        kernel={"name": "neighborhood_subgraph_pairwise_distance"},
        verbose=verbose,
        normalize=normalize)
    nspd_kernel.fit(train)
    assert is_picklable(nspd_kernel)

    gk.fit(train)
    assert is_picklable(gk)
Ejemplo n.º 7
0
def untangle(graph,
             k_hop,
             with_data: bool = True,
             with_call: bool = True,
             with_name: bool = True):
    seeds, list_of_graphs = deltaPDG_to_list_of_Graphs(graph, khop_k=k_hop)
    wl_subtree = GraphKernel(kernel=[{
        "name": "weisfeiler_lehman",
        "n_iter": 10
    }, {
        "name": "subtree_wl"
    }],
                             normalize=True)
    if len(list_of_graphs) > 0:
        similarities = defaultdict(lambda: (0, 0.0))
        for g1, g2 in itertools.combinations(list_of_graphs, 2):
            # The graph has to be converted to {Graph, Node_Labels, Edge_Labels}
            wl_subtree.fit(
                [graph_to_grakel(g1, with_data, with_call, with_name)])
            similarity = wl_subtree.transform(
                [graph_to_grakel(g2, with_data, with_call, with_name)])[0][0]
            similarities[(list_of_graphs.index(g1),
                          list_of_graphs.index(g2))] = similarity

        n = len(list_of_graphs)
        affinity = np.zeros(shape=(scipy.special.comb(n, 2, exact=True), ))
        args = list(enumerate(itertools.combinations(range(n), 2)))
        with ThreadPool(processes=min(os.cpu_count() - 1, 1)) as wp:
            for k, value in wp.imap_unordered(
                    lambda i: (i[0], similarities[(i[-1][0], i[-1][1])]),
                    args):
                affinity[k] += (1 - value
                                )  # affinity is distance! so (1 - sim)

        cluster = AgglomerativeClustering(n_clusters=None,
                                          distance_threshold=0.5,
                                          affinity='precomputed',
                                          linkage='complete')
        if len(affinity) < 2:
            if len(affinity) == 1:
                labels = np.asarray(
                    [0, 0]) if affinity[0] <= 0.5 else np.asarray([0, 1])
            else:
                labels = np.asarray([0])
        else:
            labels = cluster.fit_predict(
                scipy.spatial.distance.squareform(affinity))
    else:
        labels = None

    label = list()
    for node, data in graph.nodes(data=True):
        if 'color' in data.keys():
            i = seeds.index(node) if node in seeds else -1

            if labels is not None and i != -1:
                data['label'] = '%d: ' % labels[i] + data['label']
                label.append(labels[i])
                graph.add_node(node, **data)
            else:
                data['label'] = '-1: ' + data['label']
                label.append(-1)
                graph.add_node(node, **data)

    return graph
Ejemplo n.º 8
0
    def worker(work):
        for graph_location in tqdm(work, leave=False):
            chain = os.path.basename(
                os.path.dirname(os.path.dirname(graph_location)))
            q = int(os.path.basename(os.path.dirname(graph_location)))
            graph = obj_dict_to_networkx(read_graph_from_dot(graph_location))
            graph = remove_all_except(graph, edges_kept)

            if len(graph.nodes) == 0:
                continue

            t0 = time.perf_counter()
            for i in range(times):
                seeds, list_of_graphs = deltaPDG_to_list_of_Graphs(
                    graph, khop_k=k_hop)
                wl_subtree = GraphKernel(kernel=[{
                    "name": "weisfeiler_lehman",
                    "n_iter": 10
                }, {
                    "name": "subtree_wl"
                }],
                                         normalize=True)
                if len(list_of_graphs) > 0:
                    similarities = defaultdict(lambda: (0, 0.0))
                    for g1, g2 in itertools.combinations(list_of_graphs, 2):
                        # The graph has to be converted to {Graph, Node_Labels, Edge_Labels}
                        wl_subtree.fit([
                            graph_to_grakel(g1, with_data, with_call,
                                            with_name)
                        ])
                        similarity = wl_subtree.transform([
                            graph_to_grakel(g2, with_data, with_call,
                                            with_name)
                        ])[0][0]
                        similarities[(list_of_graphs.index(g1),
                                      list_of_graphs.index(g2))] = similarity

                    n = len(list_of_graphs)
                    affinity = np.zeros(
                        shape=(scipy.special.comb(n, 2, exact=True), ))
                    args = list(enumerate(itertools.combinations(range(n), 2)))
                    with ThreadPool(processes=min(os.cpu_count() -
                                                  1, 1)) as wp:
                        for k, value in wp.imap_unordered(
                                lambda i: (i[0], similarities[
                                    (i[-1][0], i[-1][1])]), args):
                            affinity[k] += (
                                1 - value
                            )  # affinity is distance! so (1 - sim)

                    cluster = AgglomerativeClustering(n_clusters=None,
                                                      distance_threshold=0.5,
                                                      affinity='precomputed',
                                                      linkage='complete')
                    if len(affinity) < 2:
                        if len(affinity) == 1:
                            labels = np.asarray([
                                0, 0
                            ]) if affinity[0] <= 0.5 else np.asarray([0, 1])
                        else:
                            labels = np.asarray([0])
                    else:
                        labels = cluster.fit_predict(
                            scipy.spatial.distance.squareform(affinity))
                else:
                    labels = None
            t1 = time.perf_counter()
            time_ = (t1 - t0) / times

            truth = list()
            label = list()
            for node, data in graph.nodes(data=True):
                if 'color' in data.keys():
                    if 'community' in data.keys():
                        truth.append(int(data['community']))
                        i = seeds.index(node) if node in seeds else -1

                        if labels is not None and i != -1:
                            data['label'] = '%d: ' % labels[i] + data['label']
                            label.append(labels[i])
                            graph.add_node(node, **data)
                        else:
                            data['label'] = '-1: ' + data['label']
                            label.append(-1)
                            graph.add_node(node, **data)

            nx.drawing.nx_pydot.write_dot(
                graph, graph_location[:-4] + '_output_wl_%d.dot' % k_hop)

            truth = np.asarray(truth)
            label = np.asarray(label)
            acc, overlap = evaluate(truth[label > -1],
                                    label[label > -1],
                                    q=1 if len(label) == 0 else np.max(label) +
                                    1)
            with open(
                    './out/%s/wl_%s_%d_results_%s.csv' %
                (repository_name, edges_kept, k_hop, suffix), 'a') as f:
                f.write(chain + ',' + str(q) + ',' + str(acc) + ',' +
                        str(overlap) + ',' + str(time_) + '\n')