def test_core_framework(): """Random input test for the Core kernel Framework [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 4)) base_kernel = (WeisfeilerLehman, dict(base_kernel=VertexHistogram)) core_framework = CoreFramework(verbose=verbose, normalize=normalize, base_kernel=base_kernel) kernel = ["CORE", "WL"] gk = GraphKernel(kernel=kernel, verbose=verbose, normalize=normalize) try: core_framework.fit_transform(train) core_framework.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_random_walk_labels_pd(): """Random input test for the Labelled Random Walk kernel [n_jobs=-1/generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(0.01, 12.0), n_graphs_test=40, random_state=rs, features=('nl', 3)) gk = GraphKernel( kernel={ "name": "RW", "with_labels": True }, verbose=verbose, normalize=normalize, ) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_multiscale_laplacian_fast_pd(): """Random input test for the Fast Multiscale Laplacian kernel [n_jobs=-1/generic-wrapper].""" # Initialise kernel train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('na', 5)) gk = GraphKernel(kernel={ "name": "ML", "which": "fast" }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_pyramid_match_no_labels(): """Random input test for the Pyramid Match kernel with no labels [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=None) pm_kernel = PyramidMatch(verbose=verbose, normalize=normalize, with_labels=False) gk = GraphKernel(kernel={ "name": "PM", "with_labels": False }, verbose=verbose, normalize=normalize) try: pm_kernel.fit_transform(train) pm_kernel.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_graphlet_sampling(): """Random input test for the Graphlet Sampling Kernel [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) gs_kernel = GraphletSampling(verbose=verbose, normalize=normalize, sampling=dict(n_samples=50)) gk = GraphKernel(kernel={ "name": "GR", "sampling": { "n_samples": 50 } }, verbose=verbose, normalize=normalize) try: gs_kernel.fit_transform(train) gs_kernel.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def _compute_kernel_matrix_grakel(self, event_graphs, kernel_params): """ Generic wrapper for GraKeL's graph kernel interface """ kernel = GraphKernel(kernel_params) kernel_mat = kernel.fit_transform(event_graphs) return kernel_mat
def test_weisfeiler_lehman_pd(): """Random input test for the Weisfeiler Lehman kernel [n_jobs=-1/generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) gk = GraphKernel(kernel="WL", verbose=verbose, normalize=normalize) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_svm_theta_pd(): """Random input test for the SVM-theta distance kernel [n_jobs=-1/generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=None) gk = GraphKernel(kernel="svm_theta", verbose=verbose, normalize=normalize) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_propagation_pd(): """Random input test for the Propagation kernel [n_jobs=-1/generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(float("1e-5"), 10), n_graphs_test=40, random_state=rs, features=('nl', 4)) gk = GraphKernel(kernel="PR", verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(float("1e-5"), 10), n_graphs_test=40, random_state=rs, features=('na', 5)) gk = GraphKernel(kernel={ "name": "PR", "with_attributes": True }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def cross_validation_with_and_without_manifold(X, y, n_neighbors, n_components, k): # Split indexes according to Kfold with k = 10 kf = KFold(n_splits=k) # initialize scores lists scores = [] scores2 = [] for train_index, test_index in kf.split(X): kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True) # split train and test of K-fold X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Calculate the kernel matrix. K_train = kernel.fit_transform(X_train) K_test = kernel.transform(X_test) # Initialise an SVM and fit. clf = svm.SVC(kernel='precomputed', C=4) clf.fit(K_train, y_train) # Predict and test. y_pred = clf.predict(K_test) # Calculate accuracy of classification. acc = accuracy_score(y_test, y_pred) scores.append(acc) # Compute distance matrix D_train = compute_distance_matrix(K_train) D_test = compute_distance_matrix(K_test) # Initialize Isomap embedding object, embed train and test data embedding = manifold.Isomap(n_neighbors, n_components, metric="precomputed") E_train = embedding.fit_transform(D_train) E_test = embedding.transform(D_test) # initialize second svm (not necessary? search documentation) clf2 = svm.SVC(kernel='linear', C=4) clf2.fit(E_train, y_train) # Predict and test. y_pred = clf2.predict(E_test) # Calculate accuracy of classification. acc = accuracy_score(y_test, y_pred) scores2.append(acc) for i, _ in enumerate(scores): scores[i] = scores[i] * 100 for i, _ in enumerate(scores2): scores2[i] = scores2[i] * 100 return scores, scores2
def test_shortest_path_pd(): """Random input test for the Shortest Path kernel [n_jobs=-1 (for attributed)/decorator].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) gk = GraphKernel(kernel="SP", verbose=verbose, normalize=normalize) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception train, test = generate_dataset(n_graphs=50, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=20, random_state=rs, features=('na', 5)) gk = GraphKernel(kernel={ "name": "SP", "as_attributes": True }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_odd_sth(): """Random input test for the ODD-STh kernel [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 4)) odd_sth_kernel = OddSth(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel="ODD", verbose=verbose, normalize=normalize) try: odd_sth_kernel.fit_transform(train) odd_sth_kernel.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def test_edge_histogram(): """Random input test for the Edge Histogram kernel [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('el', 4)) eh_kernel = EdgeHistogram(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel="EH", verbose=verbose, normalize=normalize) try: eh_kernel.fit_transform(train) eh_kernel.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def getMethodSim(pairMethodGraph): sim = {} for filekey in pairMethodGraph.keys(): _sim = {} file = pairMethodGraph[filekey] # 不存在文件对的情况 keytype = [key for key in file.keys()] if keytype[0] == "change": if file["change"] == "nomethod": # 接口,无函数匹配 sim[filekey] = {"sim": "1.0"} elif file["change"] == "nomatch": # 没有函数匹配,均需要扫描 sim[filekey] = {"sim": "0.0"} elif file["change"] == "addfile": # 增加函数、或者文件 sim[filekey] = {"sim": "2.0"} else: # 删除文件或者函数 sim[filekey] = {"sim": "-1.0"} else: # 存在文件对的情况 for keytupe in file.keys(): # keytupe:(good_1.0,good_1.1函数对) keytupe = tuple(keytupe) # 如果不存在函数对 if keytupe[0] != "" and keytupe[1] != "": # method:函数graph:method[0]:base、method[1]:target method = file[keytupe] basegraph = method[0] targetgraph = method[1] adj1, node_label1, edge_label1 = getadjlist(basegraph) adj2, node_label2, edge_label2 = getadjlist(targetgraph) # 如果存在空结点: if adj2.shape[0] == 0 or adj1.shape[0] == 0: _sim[keytupe] = [[1.0]] # 两个图的邻接矩阵均是非空矩阵 else: sp_kernal = GraphKernel( kernel=["weisfeiler_lehman", "subtree_wl"], normalize=True) g1 = Graph(adj1, node_label1, edge_label1) g2 = Graph(adj2, node_label2, edge_label2) tp = sp_kernal.fit_transform([g1]) tsim = sp_kernal.transform([g2]) _sim[keytupe] = tsim.tolist() else: # 不存在函数对,直接令相似度为0 _sim[keytupe] = [[0.0]] sim[filekey] = _sim return sim
def test_neighborhood_subgraph_pairwise_distance(): """Random input test for the Neighborhood Subgraph Pairwise Distance kernel [+ generic-wrapper].""" train, test = generate_dataset(n_graphs=100, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 5, 'el', 4)) nspd_kernel = NeighborhoodSubgraphPairwiseDistance(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel="NSPD", verbose=verbose, normalize=normalize) try: nspd_kernel.fit_transform(train) nspd_kernel.transform(test) gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
def Csvm_SPK(X,y,Csvm_start, Csvm_end, k, fun = lambda x : x): Csvm_range = Csvm_end-Csvm_start+1 res = [] x_points = [] for c in range(Csvm_range): Csvm = fun(c+Csvm_start) # initialize scores list scores = [] # initialize x-axis points kf = KFold(n_splits=k) for train_index, test_index in kf.split(X): kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True) # split train and test of K-fold X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Calculate the kernel matrix. K_train = kernel.fit_transform(X_train) K_test = kernel.transform(X_test) # Initialise an SVM and fit. clf = svm.SVC(kernel='precomputed', C=Csvm) clf.fit(K_train, y_train) # Predict and test. y_pred = clf.predict(K_test) # Calculate accuracy of classification. acc = accuracy_score(y_test, y_pred) scores.append(acc) res.append( np.mean(scores)) x_points.append(fun(c + Csvm_start)) print("{0:.2%} done".format((c+1.0)/Csvm_range)) pyplot.plot(x_points, res, 'ro') pyplot.title("%d - fold avg. accuracy of SVM over C without ML step" %(k)) pyplot.xlabel('C') pyplot.ylabel('Avg. accuracy') pyplot.show()
def getMethodSim(pairMethodGraph): sim = {} for filekey in pairMethodGraph.keys(): file = pairMethodGraph[filekey] _sim = {} for keytupe in file.keys(): keytupe = tuple(keytupe) method = file[keytupe] basegraph = method[0] targetgraph = method[1] adj1, node_label1, edge_label1 = getadjlist(basegraph) adj2, node_label2, edge_label2 = getadjlist(targetgraph) sp_kernal = GraphKernel(kernel={"name": "shortest_path"}, normalize=True) g1 = Graph(adj1, node_label1, edge_label1) g2 = Graph(adj2, node_label2, edge_label2) tp = sp_kernal.fit_transform([g1]) sim = sp_kernal.transform([g2]) _sim[keytupe] = sim sim[file] = _sim
def run_wl_kernel(args, dataloader, model, edge_ratio): dataset = dataloader.dataset pos_graph_list = [] neg_graph_list = [] meta_test_edge_ratio = 1 - args.meta_val_edge_ratio - args.meta_train_edge_ratio i = 0 for graph in dataset: print("Graph : %d" % (i)) i += 1 try: x, train_pos_edge_index = graph.x.to(args.dev), \ graph.train_pos_edge_index.to(args.dev) data = graph except: data = model.split_edges(graph,val_ratio=args.meta_val_edge_ratio,\ test_ratio=meta_test_edge_ratio) nx_graph = create_masked_networkx_graph(data) neg_graph = erdos_renyi_graph(len(nx_graph), edge_ratio) pos_edge_list = list(nx_graph.edges()) neg_edge_list = list(neg_graph.edges()) pos_node_dict = {} neg_node_dict = {} ''' Pos Samples ''' for node_id in nx_graph.nodes: nx_graph.node[node_id]['label'] = nx_graph.degree[node_id] pos_node_dict[node_id] = nx_graph.degree[node_id] pos_grakel_graph = [pos_edge_list, pos_node_dict] pos_graph_list.append(pos_grakel_graph) '''Neg Samples ''' for node_id in nx_graph.nodes: nx_graph.node[node_id]['label'] = nx_graph.degree[node_id] neg_node_dict[node_id] = nx_graph.degree[node_id] neg_grakel_graph = [neg_edge_list, neg_node_dict] neg_graph_list.append(neg_grakel_graph) wl_kernel = GraphKernel(kernel = [{"name": "weisfeiler_lehman", "n_iter": 5},\ {"name": "subtree_wl"}], Nystroem=len(dataset)) kernel_mat = wl_kernel.fit_transform(pos_graph_list) neg_kernel_mat = wl_kernel.transform(neg_graph_list) return kernel_mat, neg_kernel_mat
produce_labels_nodes=True) G, y = np.asarray(dataset_d.data), np.asarray(dataset_d.target) stats = {m: {"acc": list(), "time": list()} for m in Methods} kfold = KFold(n_splits=10, random_state=50, shuffle=True) for train_idx, test_idx in kfold.split(G, y): train_g, train_y = G[train_idx], y[train_idx] test_g, test_y = G[test_idx], y[test_idx] for i, k in enumerate(Methods): gk = GraphKernel(kernel=kernels[k], normalize=True) start = time.time() k_train = gk.fit_transform(train_g) k_test = gk.transform(test_g) end = time.time() clf = svm.SVC(kernel='precomputed') clf.fit(k_train, train_y) pred_y = clf.predict(k_test) stats[k]["acc"].append(accuracy_score(test_y, pred_y)) stats[k]["time"].append(end - start) for m in Methods: print("kernel: ", m, "time: ", np.round(np.mean(stats[m]["time"]), 2), "~", np.round(np.std(stats[m]["time"]), 2), "acc: ", np.round(np.mean(stats[m]["acc"]), 2), "~",
graph = tfe_obj.generate_graph_from_text( text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED) inp = nx.to_dict_of_lists(graph) x_val_title_graphs.append([inp]) x_test_title_graphs = list() for text in dl_obj.x_test['title']: graph = tfe_obj.generate_graph_from_text( text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED) inp = nx.to_dict_of_lists(graph) x_test_title_graphs.append([inp]) print(len(x_train_title_graphs)) print(len(x_val_title_graphs)) K_train = sp_kernel.fit_transform(x_train_title_graphs) K_val = sp_kernel.transform(x_val_title_graphs) # clf = SVC(kernel='precomputed') clf = LogisticRegression() clf.fit(K_train, y_train_one_hot) y_pred = clf.predict(K_val) from sklearn.metrics import accuracy_score print("%2.2f %%" % (round(accuracy_score(y_val_one_hot, y_pred) * 100)))
sm_ac = [] for iter in range(3): print("Iter: ", iter) # Train-test split of graph data G_train_rw, G_test_rw, y_train_rw, y_test_rw = prepare_data(G_rw, y, random_state=iter) G_train_sm, G_test_sm, y_train_sm, y_test_sm = prepare_data(G_sm, y, random_state=iter) print("Data Set prepared") for (i, k) in enumerate(rows): print(k, end=" ") gk = GraphKernel(kernel=kernels[k], normalize=True) print("", end=".") # Calculate the kernel matrix for raw data start = time.time() K_train_rw = gk.fit_transform(G_train_rw) K_test_rw = gk.transform(G_test_rw) end = time.time() print("", end=".") # Initialise an SVM and fit. clf = svm.SVC(kernel='precomputed') clf.fit(K_train_rw, y_train_rw) print("", end=". ") # Predict and test. y_pred_rw = clf.predict(K_test_rw) print("Confusion Matrix: \n", confusion_matrix(y_test_rw, y_pred_rw)) plot_confusion_matrix(y_test_rw, y_pred_rw, labels, title="Confusion Matrix Before Smoothing") # Calculate accuracy of classification.
for text in dl_obj.x_val['abstract']: graph = tfe_obj.generate_graph_from_text( text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED) inp = nx.to_dict_of_lists(graph) x_val_abstract_graphs.append([inp]) x_test_abstract_graphs = list() for text in dl_obj.x_test['abstract']: graph = tfe_obj.generate_graph_from_text( text=text, remove_stopwords=REMOVE_STOP_WORDS, directed=DIRECTED) inp = nx.to_dict_of_lists(graph) x_test_abstract_graphs.append([inp]) K_train = sp_kernel.fit_transform(x_train_abstract_graphs) K_val = sp_kernel.transform(x_val_abstract_graphs) K_test = sp_kernel.transform(x_test_abstract_graphs) ###################################################################################################### ##################################################################################################### ##################################################################################################### x_train_static = np.concatenate( (x_train_citation_metrics.values, x_train_citations_emb, x_train_comm, x_train_authors_communities, K_train), axis=1) x_val_static = np.concatenate( (x_val_citation_metrics.values, x_val_citations_emb, x_val_comm, x_val_authors_communities, K_val), axis=1)
K_train是训练集的特征表示 K_test是测试集的特征表示 这里是给定一种核的使用方式:基于“基分解”的思想 Informally, 以训练集为基,计算每个样本在训练集上的坐标,从而构成一个样本的特征向量。 所以,每个特征向量长度都是样本集的大小 在得到特征向量后,再用SVM进行分类 在wl_kernel.fit_transform的实现过程中,这里使用了分块矩阵分开计算再合并的思想。 当然,如果给定一个数据集,给这一个核,如何进行分类呢? i). 可以使用上述“基分解”的思想 ii). 也可以使用其它方法,如KNN,因为定义了核,就有了相似度的度量,也就有了距离。 当然,距离可以用核来表示,但距离也可以用神经网络来度量,所以就了有Metric Learning! ''' K_train = wl_kernel.fit_transform(X_train) K_test = wl_kernel.transform(X_test) # K_test = wl_kernel.fit(X_train).transform(X_test) y = mutag.target y_train, y_test = y[:split_point], y[split_point:] from sklearn.svm import SVC clf = SVC(kernel='precomputed') clf.fit(K_train, y_train) y_pred = clf.predict(K_test) from sklearn.metrics import accuracy_score
def spk_isomap(X,y, k, KNNstart, KNNend, Dstart, Dend, svmC): filename = "accuracy.txt" myfile = open(filename, 'a') # Add info to file myfile.write('SP Isomap accuracy: K = %d-%d, D = %d-%d, C = %d, K-fold = %d\n' % (KNNstart, KNNend, Dstart, Dend, svmC, k)) KNN = [] KNNrange = KNNend - KNNstart+1 D = [] Drange = Dend - Dstart+1 for knn in range(KNNrange): KNN.append( knn + KNNstart) for d in range(Drange): D.append(d + Dstart) kf = KFold(n_splits=k) scores = [] Z = np.ndarray(shape=( len(D) , len(KNN) )) for knn in range(len(KNN)): for d in range(len(D)): for train_index, test_index in kf.split(X): kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True) # split train and test of K-fold X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Calculate the kernel matrix. K_train = kernel.fit_transform(X_train) K_test = kernel.transform(X_test) # Compute distance matrix D_train = compute_distance_matrix(K_train) D_test = compute_distance_matrix(K_test) # Initialize Isomap embedding object, embed train and test data embedding = manifold.Isomap(n_neighbors=KNN[knn], n_components=D[d], metric="precomputed") E_train = embedding.fit_transform(D_train) E_test = embedding.transform(D_test) # initialize second svm (not necessary? search documentation) clf2 = svm.SVC(kernel='linear', C=svmC) clf2.fit(E_train, y_train) # Predict and test. y_pred = clf2.predict(E_test) # Append accuracy of classification. scores.append(accuracy_score(y_test, y_pred)) val = np.mean(scores) Z[d][knn] = val myfile.write("%f " % (val)) print("knn = ", KNN[knn], "d = ", D[d], " accuracy = ", Z[d][knn]) print("{0:.2%} done".format((Drange*knn+d+1.0)/(Drange*KNNrange))) # print("{0:.2%} done".format((D*k+d + 1.0)/(D*KNN) )) myfile.write("\n") # Close the file myfile.close() return Z
test_size=test_size, shuffle=True, random_state=42) randomWalkKernel = GraphKernel(kernel={ "name": "random_walk", "with_labels": False }, normalize=True) graphletKernel = GraphKernel(kernel={"name": "graphlet_sampling"}, normalize=True) shortestPathKernel = GraphKernel(kernel={"name": "shortest_path"}, normalize=True) # Calculate the kernel matrix for random Walk Kernel. K_train = randomWalkKernel.fit_transform(X_train) K_test = randomWalkKernel.transform(X_test) '''nanel = 0 print (K_train[0][79-5]) print(len(K_train)) print(len(K_train[0])) for i in K_train: for el in i: if np.isnan(el): nanel += 1 print("\n How many nan elements are there? Are there exactly len(K_train) elements? ",nanel, nanel == len(K_train)) ''' # There are 158 nan elements in K_train # https://github.com/ysig/GraKeL/issues/6 # I transform each nan element into a number
def main(): # Training settings parser = argparse.ArgumentParser(description='WL subtree kernel') parser.add_argument('--dataset', type=str, default="MUTAG", help='name of dataset (default: MUTAG)') parser.add_argument( '--seed', type=int, default=0, help='random seed for splitting the dataset into 10 (default: 0)') parser.add_argument( '--fold_idx', type=int, default=0, help='the index of fold in 10-fold validation. Should be less then 10.' ) parser.add_argument('--iter', type=int, default=5, help='Number of iteration for the WL') parser.add_argument('--normalize', action="store_true", help='normalize the feature or not') parser.add_argument('--filename', type=str, default="", help='output file') args = parser.parse_args() np.random.seed(0) graphs, num_classes = load_data(args.dataset, False) ##10-fold cross validation, consider the particular fold. train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx) #SVM hyper-parameter to tune C_list = [0.01, 0.1, 1, 10, 100] X_train, y_train = convert(train_graphs) X_test, y_test = convert(test_graphs) wl_kernel = GraphKernel(kernel=[{ "name": "weisfeiler_lehman", "niter": args.iter }, { "name": "subtree_wl" }], normalize=args.normalize) K_train = wl_kernel.fit_transform(X_train) K_test = wl_kernel.transform(X_test) train_acc = [] test_acc = [] for C in C_list: clf = SVC(kernel='precomputed', C=C) clf.fit(K_train, y_train) y_pred_test = clf.predict(K_test) y_pred_train = clf.predict(K_train) train_acc.append(accuracy_score(y_train, y_pred_train) * 100) test_acc.append(accuracy_score(y_test, y_pred_test) * 100) print(train_acc) print(test_acc) if not args.filename == "": np.savetxt(args.filename, np.array([train_acc, test_acc]).transpose())
niter = 10 kernel_names = ["lovasz_theta", "svm_theta"] stats = {k: {"acc": list(), "time": list()} for k in kernel_names} for i in range(niter): # Train-test split of graph data G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1) for kernel_name in kernel_names: start = time() # Initialise a weifeiler kernel, with a dirac base_kernel. gk = GraphKernel(kernel={"name": kernel_name}, normalize=True) # Calculate the kernel matrix. K_train = gk.fit_transform(G_train) K_test = gk.transform(G_test) end = time() # Cross validation on C, variable acc = 0 for c in C_grid: # Initialise an SVM and fit. clf = svm.SVC(kernel='precomputed', C=c) # Fit on the train Kernel clf.fit(K_train, y_train) # Predict and test. y_pred = clf.predict(K_test)
from Utils import * from numpy import array from grakel import graph_from_networkx if __name__ == '__main__': low_version = "F:\GraphSim\jsondata\V1.0" high_version = "F:\GraphSim\jsondata\V1.1" base_file_list = [] target_file_list = [] pairfileList = [] getfilePath(low_version, base_file_list) getfilePath(high_version, target_file_list) pairfileList = getpairFile(base_file_list, target_file_list) for pair in pairfileList: basefile = pair[0] targetfile = pair[1] g1 = ParseFile(basefile) g2 = ParseFile(targetfile) #basefileGraph、targetfileGraph分别为待比较结点得图 _basefileGraph = g1.connectFile() _targetfileGraph = g2.connectFile() adj1, node_label1, edge_label1 = getadjlist(_basefileGraph) adj2, node_label2, edge_label2 = getadjlist(_targetfileGraph) sp_kernal = GraphKernel(kernel={"name": "shortest_path"}, normalize=True) g1 = Graph(adj1, node_label1, edge_label1) g2 = Graph(adj2, node_label2, edge_label2) tp = sp_kernal.fit_transform([g1]) sim = sp_kernal.transform([g2]) print("kernal_Done!")
def test_subgraph_matching_pd(): """Random input test for the Subgraph Matching kernel [n_jobs=-1/generic-wrapper].""" # node-label/edge-label train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3, 'el', 4)) gk = GraphKernel(kernel={"name": "SM"}, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception # node-label/edge-attribute train, test = generate_dataset(n_graphs=50, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=20, random_state=rs, features=('nl', 3, 'ea', 5)) gk = GraphKernel(kernel={ "name": "SM", "ke": np.dot }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception # node-attribute/edge-label train, test = generate_dataset(n_graphs=50, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=20, random_state=rs, features=('na', 4, 'el', 3)) gk = GraphKernel(kernel={ "name": "SM", "kv": np.dot }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception # node-attribute/edge-attribute train, test = generate_dataset(n_graphs=50, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=20, random_state=rs, features=('na', 4, 'ea', 6)) gk = GraphKernel(kernel={ "name": "SM", "kv": np.dot, "ke": np.dot }, verbose=verbose, normalize=normalize, n_jobs=-1) try: gk.fit_transform(train) gk.transform(test) assert True except Exception as exception: assert False, exception
# Split indexes according to Kfold with k = 10 k = 10 kf = KFold(n_splits=k) # initialize scores lists scores1 = [] scores2 = [] for train_index, test_index in kf.split(X): # split train and test of K-fold X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Fit and transform train and test with the graph kernel K_train = spk.fit_transform(X_train) K_test = spk.transform(X_test) # Initialize and fit classifier for non-embedded graph with test data clf1 = svm.SVC(kernel='linear', C=1) clf1.fit(K_train, y_train) # make prediction and calculate accuracy y_pred = clf1.predict(K_test) acc = accuracy_score(y_test, y_pred) scores1.append(acc) ''' D_train = compute_distance_matrix(K_train) D_test = compute_test_distance_matrix (K_train, K_test) embedding = manifold.Isomap(n_neighbors=5, n_components=10, metric="precomputed") E_train = embedding.fit_transform(D_train)