Ejemplo n.º 1
0
def test_core_framework():
    """Random input test for the Core kernel Framework [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 4))

    base_kernel = (WeisfeilerLehman, dict(base_kernel=VertexHistogram))
    core_framework = CoreFramework(verbose=verbose,
                                   normalize=normalize,
                                   base_kernel=base_kernel)

    kernel = ["CORE", "WL"]
    gk = GraphKernel(kernel=kernel, verbose=verbose, normalize=normalize)
    try:
        core_framework.fit_transform(train)
        core_framework.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 2
0
def test_random_walk_labels_pd():
    """Random input test for the Labelled Random Walk kernel [n_jobs=-1/generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(0.01, 12.0),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    gk = GraphKernel(
        kernel={
            "name": "RW",
            "with_labels": True
        },
        verbose=verbose,
        normalize=normalize,
    )

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 3
0
def test_pyramid_match_no_labels():
    """Random input test for the Pyramid Match kernel with no labels [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=None)

    pm_kernel = PyramidMatch(verbose=verbose,
                             normalize=normalize,
                             with_labels=False)
    gk = GraphKernel(kernel={
        "name": "PM",
        "with_labels": False
    },
                     verbose=verbose,
                     normalize=normalize)

    try:
        pm_kernel.fit_transform(train)
        pm_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 4
0
def test_multiscale_laplacian_fast_pd():
    """Random input test for the Fast Multiscale Laplacian kernel [n_jobs=-1/generic-wrapper]."""
    # Initialise kernel
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel={
        "name": "ML",
        "which": "fast"
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 5
0
def test_graphlet_sampling():
    """Random input test for the Graphlet Sampling Kernel [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    gs_kernel = GraphletSampling(verbose=verbose,
                                 normalize=normalize,
                                 sampling=dict(n_samples=50))
    gk = GraphKernel(kernel={
        "name": "GR",
        "sampling": {
            "n_samples": 50
        }
    },
                     verbose=verbose,
                     normalize=normalize)

    try:
        gs_kernel.fit_transform(train)
        gs_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 6
0
def test_weisfeiler_lehman_pd():
    """Random input test for the Weisfeiler Lehman kernel [n_jobs=-1/generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    gk = GraphKernel(kernel="WL", verbose=verbose, normalize=normalize)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 7
0
def test_svm_theta_pd():
    """Random input test for the SVM-theta distance kernel [n_jobs=-1/generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=None)

    gk = GraphKernel(kernel="svm_theta", verbose=verbose, normalize=normalize)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 8
0
def test_propagation_pd():
    """Random input test for the Propagation kernel [n_jobs=-1/generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(float("1e-5"), 10),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 4))

    gk = GraphKernel(kernel="PR",
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(float("1e-5"), 10),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel={
        "name": "PR",
        "with_attributes": True
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 9
0
def cross_validation_with_and_without_manifold(X, y, n_neighbors, n_components, k):
    # Split indexes according to Kfold with k = 10
    kf = KFold(n_splits=k)

    # initialize scores lists
    scores = []
    scores2 = []
    for train_index, test_index in kf.split(X):
        kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True)

        # split train and test of K-fold
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Calculate the kernel matrix.
        K_train = kernel.fit_transform(X_train)
        K_test = kernel.transform(X_test)

        # Initialise an SVM and fit.
        clf = svm.SVC(kernel='precomputed', C=4)
        clf.fit(K_train, y_train)

        # Predict and test.
        y_pred = clf.predict(K_test)

        # Calculate accuracy of classification.
        acc = accuracy_score(y_test, y_pred)
        scores.append(acc)

        # Compute distance matrix
        D_train = compute_distance_matrix(K_train)
        D_test = compute_distance_matrix(K_test)

        # Initialize Isomap embedding object, embed train and test data
        embedding = manifold.Isomap(n_neighbors, n_components, metric="precomputed")
        E_train = embedding.fit_transform(D_train)
        E_test = embedding.transform(D_test)

        # initialize second svm (not necessary? search documentation)
        clf2 = svm.SVC(kernel='linear', C=4)
        clf2.fit(E_train, y_train)

        # Predict and test.
        y_pred = clf2.predict(E_test)

        # Calculate accuracy of classification.
        acc = accuracy_score(y_test, y_pred)
        scores2.append(acc)
    for i, _ in enumerate(scores):
        scores[i] = scores[i] * 100

    for i, _ in enumerate(scores2):
        scores2[i] = scores2[i] * 100
    return scores, scores2
Ejemplo n.º 10
0
def test_shortest_path_pd():
    """Random input test for the Shortest Path kernel [n_jobs=-1 (for attributed)/decorator]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    gk = GraphKernel(kernel="SP", verbose=verbose, normalize=normalize)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel={
        "name": "SP",
        "as_attributes": True
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 11
0
def test_odd_sth():
    """Random input test for the ODD-STh kernel [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 4))

    odd_sth_kernel = OddSth(verbose=verbose, normalize=normalize)
    gk = GraphKernel(kernel="ODD", verbose=verbose, normalize=normalize)

    try:
        odd_sth_kernel.fit_transform(train)
        odd_sth_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 12
0
def test_edge_histogram():
    """Random input test for the Edge Histogram kernel [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('el', 4))

    eh_kernel = EdgeHistogram(verbose=verbose, normalize=normalize)
    gk = GraphKernel(kernel="EH", verbose=verbose, normalize=normalize)

    try:
        eh_kernel.fit_transform(train)
        eh_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 13
0
def getMethodSim(pairMethodGraph):
    sim = {}
    for filekey in pairMethodGraph.keys():
        _sim = {}
        file = pairMethodGraph[filekey]
        # 不存在文件对的情况
        keytype = [key for key in file.keys()]
        if keytype[0] == "change":
            if file["change"] == "nomethod":
                # 接口,无函数匹配
                sim[filekey] = {"sim": "1.0"}
            elif file["change"] == "nomatch":
                # 没有函数匹配,均需要扫描
                sim[filekey] = {"sim": "0.0"}
            elif file["change"] == "addfile":
                # 增加函数、或者文件
                sim[filekey] = {"sim": "2.0"}
            else:
                # 删除文件或者函数
                sim[filekey] = {"sim": "-1.0"}

        else:
            # 存在文件对的情况

            for keytupe in file.keys():
                # keytupe:(good_1.0,good_1.1函数对)
                keytupe = tuple(keytupe)
                # 如果不存在函数对
                if keytupe[0] != "" and keytupe[1] != "":
                    # method:函数graph:method[0]:base、method[1]:target
                    method = file[keytupe]
                    basegraph = method[0]
                    targetgraph = method[1]
                    adj1, node_label1, edge_label1 = getadjlist(basegraph)
                    adj2, node_label2, edge_label2 = getadjlist(targetgraph)
                    # 如果存在空结点:
                    if adj2.shape[0] == 0 or adj1.shape[0] == 0:
                        _sim[keytupe] = [[1.0]]
                    # 两个图的邻接矩阵均是非空矩阵
                    else:
                        sp_kernal = GraphKernel(
                            kernel=["weisfeiler_lehman", "subtree_wl"],
                            normalize=True)
                        g1 = Graph(adj1, node_label1, edge_label1)
                        g2 = Graph(adj2, node_label2, edge_label2)
                        tp = sp_kernal.fit_transform([g1])
                        tsim = sp_kernal.transform([g2])
                        _sim[keytupe] = tsim.tolist()
                else:
                    # 不存在函数对,直接令相似度为0
                    _sim[keytupe] = [[0.0]]
            sim[filekey] = _sim
    return sim
Ejemplo n.º 14
0
def test_neighborhood_subgraph_pairwise_distance():
    """Random input test for the Neighborhood Subgraph Pairwise Distance kernel [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 5, 'el', 4))

    nspd_kernel = NeighborhoodSubgraphPairwiseDistance(verbose=verbose,
                                                       normalize=normalize)
    gk = GraphKernel(kernel="NSPD", verbose=verbose, normalize=normalize)

    try:
        nspd_kernel.fit_transform(train)
        nspd_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 15
0
def Csvm_SPK(X,y,Csvm_start, Csvm_end, k, fun = lambda x : x):
    Csvm_range = Csvm_end-Csvm_start+1
    res = []
    x_points = []
    for c in range(Csvm_range):
        Csvm = fun(c+Csvm_start)
        # initialize scores list
        scores = []
        # initialize x-axis points


        kf = KFold(n_splits=k)
        for train_index, test_index in kf.split(X):
            kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True)

            # split train and test of K-fold
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            # Calculate the kernel matrix.
            K_train = kernel.fit_transform(X_train)
            K_test = kernel.transform(X_test)

            # Initialise an SVM and fit.
            clf = svm.SVC(kernel='precomputed', C=Csvm)
            clf.fit(K_train, y_train)

            # Predict and test.
            y_pred = clf.predict(K_test)

            # Calculate accuracy of classification.
            acc = accuracy_score(y_test, y_pred)
            scores.append(acc)

        res.append( np.mean(scores))
        x_points.append(fun(c + Csvm_start))
        print("{0:.2%} done".format((c+1.0)/Csvm_range))

    pyplot.plot(x_points, res, 'ro')
    pyplot.title("%d - fold avg. accuracy of SVM over C without ML step" %(k))
    pyplot.xlabel('C')
    pyplot.ylabel('Avg. accuracy')
    pyplot.show()
Ejemplo n.º 16
0
def getMethodSim(pairMethodGraph):
    sim = {}
    for filekey in pairMethodGraph.keys():
        file = pairMethodGraph[filekey]
        _sim = {}
        for keytupe in file.keys():
            keytupe = tuple(keytupe)
            method = file[keytupe]
            basegraph = method[0]
            targetgraph = method[1]
            adj1, node_label1, edge_label1 = getadjlist(basegraph)
            adj2, node_label2, edge_label2 = getadjlist(targetgraph)
            sp_kernal = GraphKernel(kernel={"name": "shortest_path"},
                                    normalize=True)
            g1 = Graph(adj1, node_label1, edge_label1)
            g2 = Graph(adj2, node_label2, edge_label2)
            tp = sp_kernal.fit_transform([g1])
            sim = sp_kernal.transform([g2])
            _sim[keytupe] = sim
        sim[file] = _sim
Ejemplo n.º 17
0
def run_wl_kernel(args, dataloader, model, edge_ratio):
    dataset = dataloader.dataset
    pos_graph_list = []
    neg_graph_list = []
    meta_test_edge_ratio = 1 - args.meta_val_edge_ratio - args.meta_train_edge_ratio
    i = 0
    for graph in dataset:
        print("Graph : %d" % (i))
        i += 1
        try:
            x, train_pos_edge_index = graph.x.to(args.dev), \
                graph.train_pos_edge_index.to(args.dev)
            data = graph
        except:
            data = model.split_edges(graph,val_ratio=args.meta_val_edge_ratio,\
                                     test_ratio=meta_test_edge_ratio)
        nx_graph = create_masked_networkx_graph(data)
        neg_graph = erdos_renyi_graph(len(nx_graph), edge_ratio)
        pos_edge_list = list(nx_graph.edges())
        neg_edge_list = list(neg_graph.edges())
        pos_node_dict = {}
        neg_node_dict = {}
        ''' Pos Samples '''
        for node_id in nx_graph.nodes:
            nx_graph.node[node_id]['label'] = nx_graph.degree[node_id]
            pos_node_dict[node_id] = nx_graph.degree[node_id]
            pos_grakel_graph = [pos_edge_list, pos_node_dict]
        pos_graph_list.append(pos_grakel_graph)
        '''Neg Samples '''
        for node_id in nx_graph.nodes:
            nx_graph.node[node_id]['label'] = nx_graph.degree[node_id]
            neg_node_dict[node_id] = nx_graph.degree[node_id]
            neg_grakel_graph = [neg_edge_list, neg_node_dict]
        neg_graph_list.append(neg_grakel_graph)
    wl_kernel = GraphKernel(kernel = [{"name": "weisfeiler_lehman", "n_iter": 5},\
                {"name": "subtree_wl"}], Nystroem=len(dataset))
    kernel_mat = wl_kernel.fit_transform(pos_graph_list)
    neg_kernel_mat = wl_kernel.transform(neg_graph_list)
    return kernel_mat, neg_kernel_mat
Ejemplo n.º 18
0
    G, y = np.asarray(dataset_d.data), np.asarray(dataset_d.target)

    stats = {m: {"acc": list(), "time": list()} for m in Methods}

    kfold = KFold(n_splits=10, random_state=50, shuffle=True)

    for train_idx, test_idx in kfold.split(G, y):
        train_g, train_y = G[train_idx], y[train_idx]
        test_g, test_y = G[test_idx], y[test_idx]

        for i, k in enumerate(Methods):
            gk = GraphKernel(kernel=kernels[k], normalize=True)

            start = time.time()
            k_train = gk.fit_transform(train_g)
            k_test = gk.transform(test_g)
            end = time.time()

            clf = svm.SVC(kernel='precomputed')
            clf.fit(k_train, train_y)

            pred_y = clf.predict(k_test)

            stats[k]["acc"].append(accuracy_score(test_y, pred_y))
            stats[k]["time"].append(end - start)

    for m in Methods:
        print("kernel: ", m, "time: ", np.round(np.mean(stats[m]["time"]), 2),
              "~", np.round(np.std(stats[m]["time"]), 2), "acc: ",
              np.round(np.mean(stats[m]["acc"]), 2), "~",
              np.round(np.std(stats[m]["acc"]), 2))
Ejemplo n.º 19
0
    graph = tfe_obj.generate_graph_from_text(
        text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED)

    inp = nx.to_dict_of_lists(graph)
    x_val_title_graphs.append([inp])

x_test_title_graphs = list()
for text in dl_obj.x_test['title']:
    graph = tfe_obj.generate_graph_from_text(
        text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED)

    inp = nx.to_dict_of_lists(graph)
    x_test_title_graphs.append([inp])

print(len(x_train_title_graphs))
print(len(x_val_title_graphs))

K_train = sp_kernel.fit_transform(x_train_title_graphs)
K_val = sp_kernel.transform(x_val_title_graphs)

# clf = SVC(kernel='precomputed')
clf = LogisticRegression()

clf.fit(K_train, y_train_one_hot)

y_pred = clf.predict(K_val)

from sklearn.metrics import accuracy_score

print("%2.2f %%" % (round(accuracy_score(y_val_one_hot, y_pred) * 100)))
Ejemplo n.º 20
0
    for iter in range(3):
        print("Iter: ", iter)
        # Train-test split of graph data
        G_train_rw, G_test_rw, y_train_rw, y_test_rw = prepare_data(G_rw, y, random_state=iter)
        G_train_sm, G_test_sm, y_train_sm, y_test_sm = prepare_data(G_sm, y, random_state=iter)

        print("Data Set prepared")
        for (i, k) in enumerate(rows):
            print(k, end=" ")
            gk = GraphKernel(kernel=kernels[k], normalize=True)
            print("", end=".")

            # Calculate the kernel matrix for raw data
            start = time.time()
            K_train_rw = gk.fit_transform(G_train_rw)
            K_test_rw = gk.transform(G_test_rw)
            end = time.time()
            print("", end=".")

            # Initialise an SVM and fit.
            clf = svm.SVC(kernel='precomputed')
            clf.fit(K_train_rw, y_train_rw)
            print("", end=". ")

            # Predict and test.
            y_pred_rw = clf.predict(K_test_rw)
            print("Confusion Matrix: \n", confusion_matrix(y_test_rw, y_pred_rw))
            plot_confusion_matrix(y_test_rw, y_pred_rw, labels, title="Confusion Matrix Before Smoothing")

            # Calculate accuracy of classification.
            data_kernel_rw.append(
Ejemplo n.º 21
0
    graph = tfe_obj.generate_graph_from_text(
        text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED)

    inp = nx.to_dict_of_lists(graph)
    x_val_abstract_graphs.append([inp])

x_test_abstract_graphs = list()
for text in dl_obj.x_test['abstract']:
    graph = tfe_obj.generate_graph_from_text(
        text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED)

    inp = nx.to_dict_of_lists(graph)
    x_test_abstract_graphs.append([inp])

K_train = sp_kernel.fit_transform(x_train_abstract_graphs)
K_val = sp_kernel.transform(x_val_abstract_graphs)
K_test = sp_kernel.transform(x_test_abstract_graphs)

######################################################################################################
#####################################################################################################
#####################################################################################################
x_train_static = np.concatenate(
    (x_train_citation_metrics.values, x_train_citations_emb, x_train_comm,
     x_train_authors_communities, K_train),
    axis=1)

x_val_static = np.concatenate(
    (x_val_citation_metrics.values, x_val_citations_emb, x_val_comm,
     x_val_authors_communities, K_val),
    axis=1)
def spk_isomap(X,y, k, KNNstart, KNNend, Dstart, Dend, svmC):

    filename = "accuracy.txt"

    myfile = open(filename, 'a')

    # Add info to file
    myfile.write('SP Isomap accuracy: K = %d-%d, D = %d-%d, C = %d, K-fold = %d\n'
                 % (KNNstart, KNNend, Dstart, Dend, svmC, k))

    KNN = []
    KNNrange = KNNend - KNNstart+1
    D = []
    Drange = Dend - Dstart+1

    for knn in range(KNNrange):
        KNN.append( knn + KNNstart)


    for d in range(Drange):
        D.append(d + Dstart)


    kf = KFold(n_splits=k)
    scores = []

    Z = np.ndarray(shape=( len(D) , len(KNN) ))

    for knn in range(len(KNN)):
        for d in range(len(D)):

            for train_index, test_index in kf.split(X):

                kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True)

                # split train and test of K-fold
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                # Calculate the kernel matrix.
                K_train = kernel.fit_transform(X_train)
                K_test = kernel.transform(X_test)

                # Compute distance matrix
                D_train = compute_distance_matrix(K_train)
                D_test = compute_distance_matrix(K_test)

                # Initialize Isomap embedding object, embed train and test data
                embedding = manifold.Isomap(n_neighbors=KNN[knn], n_components=D[d], metric="precomputed")
                E_train = embedding.fit_transform(D_train)
                E_test = embedding.transform(D_test)

                # initialize second svm (not necessary? search documentation)
                clf2 = svm.SVC(kernel='linear', C=svmC)
                clf2.fit(E_train, y_train)

                # Predict and test.
                y_pred = clf2.predict(E_test)

                # Append accuracy of classification.
                scores.append(accuracy_score(y_test, y_pred))

            val = np.mean(scores)
            Z[d][knn] = val
            myfile.write("%f " % (val))
            print("knn = ", KNN[knn], "d = ", D[d], " accuracy = ", Z[d][knn])
            print("{0:.2%} done".format((Drange*knn+d+1.0)/(Drange*KNNrange)))
            # print("{0:.2%} done".format((D*k+d + 1.0)/(D*KNN) ))
        myfile.write("\n")
    # Close the file
    myfile.close()
    return Z
K_test是测试集的特征表示
这里是给定一种核的使用方式:基于“基分解”的思想
   Informally, 以训练集为基,计算每个样本在训练集上的坐标,从而构成一个样本的特征向量。
   所以,每个特征向量长度都是样本集的大小
   在得到特征向量后,再用SVM进行分类
在wl_kernel.fit_transform的实现过程中,这里使用了分块矩阵分开计算再合并的思想。

当然,如果给定一个数据集,给这一个核,如何进行分类呢?
 i). 可以使用上述“基分解”的思想
 ii). 也可以使用其它方法,如KNN,因为定义了核,就有了相似度的度量,也就有了距离。
 
当然,距离可以用核来表示,但距离也可以用神经网络来度量,所以就了有Metric Learning!

'''
K_train = wl_kernel.fit_transform(X_train)
K_test = wl_kernel.transform(X_test)
# K_test = wl_kernel.fit(X_train).transform(X_test)

y = mutag.target
y_train, y_test = y[:split_point], y[split_point:]

from sklearn.svm import SVC

clf = SVC(kernel='precomputed')

clf.fit(K_train, y_train)
y_pred = clf.predict(K_test)

from sklearn.metrics import accuracy_score

print("%2.2f %%" % (round(accuracy_score(y_test, y_pred) * 100)))
Ejemplo n.º 24
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='WL subtree kernel')
    parser.add_argument('--dataset',
                        type=str,
                        default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument('--iter',
                        type=int,
                        default=5,
                        help='Number of iteration for the WL')
    parser.add_argument('--normalize',
                        action="store_true",
                        help='normalize the feature or not')
    parser.add_argument('--filename', type=str, default="", help='output file')
    args = parser.parse_args()

    np.random.seed(0)
    graphs, num_classes = load_data(args.dataset, False)

    ##10-fold cross validation, consider the particular fold.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    #SVM hyper-parameter to tune
    C_list = [0.01, 0.1, 1, 10, 100]
    X_train, y_train = convert(train_graphs)
    X_test, y_test = convert(test_graphs)

    wl_kernel = GraphKernel(kernel=[{
        "name": "weisfeiler_lehman",
        "niter": args.iter
    }, {
        "name": "subtree_wl"
    }],
                            normalize=args.normalize)
    K_train = wl_kernel.fit_transform(X_train)
    K_test = wl_kernel.transform(X_test)

    train_acc = []
    test_acc = []
    for C in C_list:
        clf = SVC(kernel='precomputed', C=C)
        clf.fit(K_train, y_train)
        y_pred_test = clf.predict(K_test)
        y_pred_train = clf.predict(K_train)
        train_acc.append(accuracy_score(y_train, y_pred_train) * 100)
        test_acc.append(accuracy_score(y_test, y_pred_test) * 100)

    print(train_acc)
    print(test_acc)

    if not args.filename == "":
        np.savetxt(args.filename, np.array([train_acc, test_acc]).transpose())
Ejemplo n.º 25
0
                                                    shuffle=True,
                                                    random_state=42)

randomWalkKernel = GraphKernel(kernel={
    "name": "random_walk",
    "with_labels": False
},
                               normalize=True)
graphletKernel = GraphKernel(kernel={"name": "graphlet_sampling"},
                             normalize=True)
shortestPathKernel = GraphKernel(kernel={"name": "shortest_path"},
                                 normalize=True)

# Calculate the kernel matrix for random Walk Kernel.
K_train = randomWalkKernel.fit_transform(X_train)
K_test = randomWalkKernel.transform(X_test)
'''nanel = 0

print (K_train[0][79-5])
print(len(K_train))
print(len(K_train[0]))
for i in K_train:
    for el in i:
        if np.isnan(el):
            nanel += 1
print("\n How many nan elements are there? Are there exactly len(K_train) elements? ",nanel, nanel == len(K_train))
'''
# There are 158 nan elements in K_train
# https://github.com/ysig/GraKeL/issues/6
# I transform each nan element into a number
Ejemplo n.º 26
0
kernel_names = ["lovasz_theta", "svm_theta"]
stats = {k: {"acc": list(), "time": list()} for k in kernel_names}

for i in range(niter):
    # Train-test split of graph data
    G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1)


    for kernel_name in kernel_names:
        start = time()
        # Initialise a weifeiler kernel, with a dirac base_kernel.
        gk = GraphKernel(kernel={"name": kernel_name}, normalize=True)

        # Calculate the kernel matrix.
        K_train = gk.fit_transform(G_train)
        K_test = gk.transform(G_test)
        end = time()

        # Cross validation on C, variable
        acc = 0
        for c in C_grid:
            # Initialise an SVM and fit.
            clf = svm.SVC(kernel='precomputed', C=c)

            # Fit on the train Kernel
            clf.fit(K_train, y_train)

            # Predict and test.
            y_pred = clf.predict(K_test)

            # Calculate accuracy of classification.
Ejemplo n.º 27
0
def test_subgraph_matching_pd():
    """Random input test for the Subgraph Matching kernel [n_jobs=-1/generic-wrapper]."""
    # node-label/edge-label
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3, 'el', 4))

    gk = GraphKernel(kernel={"name": "SM"},
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-label/edge-attribute
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('nl', 3, 'ea', 5))

    gk = GraphKernel(kernel={
        "name": "SM",
        "ke": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-attribute/edge-label
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 4, 'el', 3))

    gk = GraphKernel(kernel={
        "name": "SM",
        "kv": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-attribute/edge-attribute
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 4, 'ea', 6))

    gk = GraphKernel(kernel={
        "name": "SM",
        "kv": np.dot,
        "ke": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 28
0
from Utils import *
from numpy import array
from grakel import graph_from_networkx
if __name__ == '__main__':
    low_version = "F:\GraphSim\jsondata\V1.0"
    high_version = "F:\GraphSim\jsondata\V1.1"
    base_file_list = []
    target_file_list = []
    pairfileList = []
    getfilePath(low_version, base_file_list)
    getfilePath(high_version, target_file_list)

    pairfileList = getpairFile(base_file_list, target_file_list)
    for pair in pairfileList:
        basefile = pair[0]
        targetfile = pair[1]
        g1 = ParseFile(basefile)
        g2 = ParseFile(targetfile)
        #basefileGraph、targetfileGraph分别为待比较结点得图
        _basefileGraph = g1.connectFile()
        _targetfileGraph = g2.connectFile()
        adj1, node_label1, edge_label1 = getadjlist(_basefileGraph)
        adj2, node_label2, edge_label2 = getadjlist(_targetfileGraph)
        sp_kernal = GraphKernel(kernel={"name": "shortest_path"},
                                normalize=True)
        g1 = Graph(adj1, node_label1, edge_label1)
        g2 = Graph(adj2, node_label2, edge_label2)
        tp = sp_kernal.fit_transform([g1])
        sim = sp_kernal.transform([g2])
    print("kernal_Done!")
Ejemplo n.º 29
0
k = 10
kf = KFold(n_splits=k)

# initialize scores lists
scores1 = []
scores2 = []

for train_index, test_index in kf.split(X):

    # split train and test of K-fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit and transform train and test with the graph kernel
    K_train = spk.fit_transform(X_train)
    K_test = spk.transform(X_test)

    # Initialize and fit classifier for non-embedded graph with test data
    clf1 = svm.SVC(kernel='linear', C=1)
    clf1.fit(K_train, y_train)

    # make prediction and calculate accuracy
    y_pred = clf1.predict(K_test)
    acc = accuracy_score(y_test, y_pred)
    scores1.append(acc)
    '''
    D_train = compute_distance_matrix(K_train)
    D_test = compute_test_distance_matrix (K_train, K_test)
    embedding = manifold.Isomap(n_neighbors=5, n_components=10, metric="precomputed")
    E_train = embedding.fit_transform(D_train)
    E_test = embedding.fit(D_test) # non esiste ancora
Ejemplo n.º 30
0
if __name__ == '__main__':

    H2O = Graph([[0, 1, 1], [1, 0, 0], [1, 0, 0]], {0: 'O', 1: 'H', 2: 'H'})
    H3O = Graph([[0, 1, 1, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], {
        0: 'O',
        1: 'H',
        2: 'H',
        3: 'H'
    })
    H2Od = dict()
    H2Od[0] = Graph({'a': {'b': 1., 'c': 1.}, 'b': {'a': 1}, 'c': {'a': 1}})
    H2Od[1] = Graph({
        ('a', 'b'): 1.,
        ('a', 'c'): 1.,
        ('c', 'a'): 1.,
        ('b', 'a'): 1.
    })
    H2Ot = array([[0, 1, 1], [1, 0, 0], [1, 0, 0]])
    H2O_labels = {0: 'O', 1: 'H', 2: 'H'}
    H2O_edge_labels = {
        (0, 1): 'pcb',
        (1, 0): 'pcb',
        (0, 2): 'pcb',
        (2, 0): 'pcb'
    }
    adj_graph = Graph(H2Ot, H2O_labels, H2O_edge_labels, "all")
    #==============================================================================
    sp_kernal = GraphKernel(kernel={"name": "shortest_path"}, normalize=True)
    kernal_m = sp_kernal.fit_transform([adj_graph])
    Sim = sp_kernal.transform([H3O])
    print("the kernal_m is :{m}\n the sim is :{s}".format(m=kernal_m, s=Sim))