def test_weisfeiler_lehman(): """Picklability test for the Weisfeiler Lehman kernel.""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) wl_st_kernel = WeisfeilerLehman(verbose=verbose, normalize=normalize, base_graph_kernel=VertexHistogram) wl_st_kernel.fit(train) assert is_picklable(wl_st_kernel)
def similarity(self, g1adj, g2adj): ds1list = [i for i, x in enumerate(list(dominant_set(g1adj))) if x > 0] ds2list = [i for i, x in enumerate(list(dominant_set(g2adj))) if x > 0] ds1adj = [] ds2adj = [] for i in ds1list: a = [] for j in ds1list: a += [g1adj[i][j]] ds1adj += [a] for i in ds2list: a = [] for j in ds2list: a += [g2adj[i][j]] ds2adj += [a] a, b, c = from_adj_to_set(ds1adj) d, e, f = from_adj_to_set(ds2adj) # tmp = ShortestPath(normalize=True).fit_transform([[a, b, c], [d, e, f]])[0][1] tmp = WeisfeilerLehman(n_iter=self.n_iter, normalize=True).fit_transform([[a, b, c], [d, e, f]])[0][1] return tmp
def test_weisfeiler_lehman(): """Eigenvalue test for the Weisfeiler Lehman kernel.""" wl_st_kernel = WeisfeilerLehman(verbose=verbose, normalize=normalize, base_graph_kernel=VertexHistogram) if verbose: print_kernel("WL/Subtree", wl_st_kernel, dataset_tr, dataset_te) else: positive_eig(wl_st_kernel, dataset)
def gk_function(algorithm, graphs, par): """ Function to run the kernel on the param grid. Since different kernels have different numbers of parameters, this is necessary. """ print("parameters", par) if algorithm == "SP_gkl": gk = ShortestPath(with_labels=True).fit_transform(graphs) elif algorithm == "EH_gkl": gk = EdgeHistogram().fit_transform(graphs) elif algorithm == "WL_gkl": gk = WeisfeilerLehman(n_iter=par).fit_transform(graphs) elif algorithm == "RW_gkl": lam, p = par gk = RandomWalkLabeled(lamda=lam, p=p).fit_transform(graphs) elif algorithm == "CSM_gkl": c, k = par # testing lambda function. c should reset for each iteration gk = SubgraphMatching( k=k, ke=lambda p1, p2: ke_kernel(p1, p2, c), # inline lambda kv=kv_kernel ).fit_transform(graphs) return(gk)
def __init__(self, kernel, detector, labeled=True, WL_iter=5, PK_bin_width=1, LOF_n_neighbors=20, LOF_n_leaf=30, **kwargs): kernels = { 'WL': WeisfeilerLehman(n_iter=WL_iter, normalize=True, base_graph_kernel=VertexHistogram), 'PK': Propagation(t_max=WL_iter, w=PK_bin_width, normalize=True) if labeled else PropagationAttr( t_max=WL_iter, w=PK_bin_width, normalize=True), } detectors = { 'OCSVM': OneClassSVM(kernel='precomputed', nu=0.1), 'LOF': LocalOutlierFactor(n_neighbors=LOF_n_neighbors, leaf_size=LOF_n_leaf, metric='precomputed', contamination=0.1), # 'IF': current similarity forest also has problem } assert kernel in kernels.keys() assert detector in detectors.keys() self.kernel = kernels[kernel] self.detector = detectors[detector] self.kernel_name = kernel self.detector_name = detector self.labeled = labeled
def test_weisfeiler_lehman(): """Random input test for the Weisfeiler Lehman kernel.""" train, test = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) wl_st_kernel = WeisfeilerLehman(verbose=verbose, normalize=normalize, base_kernel=VertexHistogram) try: wl_st_kernel.fit_transform(train) wl_st_kernel.transform(test) assert True except Exception as exception: assert False, exception
results = results.append(pd.DataFrame(data), ignore_index=True) return results GRAKEL_KERNELS = { "GK-SPath": lambda: ShortestPath(normalize=NORMALIZING_GRAPH_KERNELS), "GK-EHist": lambda: EdgeHistogram(normalize=NORMALIZING_GRAPH_KERNELS), "GK-VHist": lambda: VertexHistogram(normalize=NORMALIZING_GRAPH_KERNELS), "GK-GSamp": lambda: GraphletSampling(normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-1": lambda: WeisfeilerLehman( n_iter=1, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-2": lambda: WeisfeilerLehman( n_iter=2, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-3": lambda: WeisfeilerLehman( n_iter=3, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-4": lambda: WeisfeilerLehman( n_iter=4, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-5": lambda: WeisfeilerLehman( n_iter=5, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS), "GK-NH": lambda: NeighborhoodHash(n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram` """ from __future__ import print_function print(__doc__) import numpy as np from grakel.datasets import fetch_dataset from grakel.utils import cross_validate_Kfold_SVM from grakel.kernels import WeisfeilerLehman, VertexHistogram # Loads the MUTAG dataset MUTAG = fetch_dataset("MUTAG", verbose=False) G, y = MUTAG.data, MUTAG.target # Generates a list of kernel matrices using the Weisfeiler-Lehman subtree kernel # Each kernel matrix is generated by setting the number of iterations of the # kernel to a different value (from 2 to 7) Ks = list() for i in range(1, 7): gk = WeisfeilerLehman(n_iter=i, base_kernel=VertexHistogram, normalize=True) K = gk.fit_transform(G) Ks.append(K) # Performs 10-fold cross-validation over different kernels and the parameter C of # SVM and repeats the experiment 10 times with different folds accs = cross_validate_Kfold_SVM([Ks], y, n_iter=10) print("Average accuracy:", str(round(np.mean(accs[0]) * 100, 2)) + "%") print("Standard deviation:", str(round(np.std(accs[0]) * 100, 2)) + "%")
clf = SVC(kernel='precomputed', C=1) # Initialize SVM clf.fit(K_train, y_train) # Train SVM y_pred = clf.predict(K_test) # Predict print("Classification accuracy using ShortestPath", accuracy_score(y_test, y_pred)) gk = PyramidMatch(with_labels=True) K_train = gk.fit_transform(G_train) K_test = gk.transform(G_test) clf = SVC(kernel='precomputed', C=1) # Initialize SVM clf.fit(K_train, y_train) # Train SVM y_pred = clf.predict(K_test) # Predict print("Classification accuracy using PyramidMatch", accuracy_score(y_test, y_pred)) gk = WeisfeilerLehman(base_kernel=VertexHistogram) K_train = gk.fit_transform(G_train) K_test = gk.transform(G_test) clf = SVC(kernel='precomputed', C=1) # Initialize SVM clf.fit(K_train, y_train) # Train SVM y_pred = clf.predict(K_test) # Predict print("Classification accuracy using WeisfeilerLehman", accuracy_score(y_test, y_pred))
print('>>> 10-fold cross-validation --- fold %d' % curr_fold) kf2 = StratifiedKFold(n_splits=9, shuffle=False) train_val_data = [dataset.data[i] for i in train_val_idxs] train_val_targets = [dataset.target[i] for i in train_val_idxs] for train_idxs, _ in kf2.split(train_val_data, train_val_targets): print(len(train_idxs), len(dataset.data)) train_dataset_data = [train_val_data[i] for i in train_idxs] train_dataset_target = [train_val_targets[i] for i in train_idxs] break test_data = [dataset.data[i] for i in test_idxs] test_targets = [dataset.target[i] for i in test_idxs] # Uses the Weisfeiler-Lehman subtree kernel to generate the kernel matrices gk = WeisfeilerLehman(n_iter=4, base_graph_kernel=VertexHistogram, normalize=True) K_train = gk.fit_transform(train_dataset_data) K_test = gk.transform(test_data) # Uses the SVM classifier to perform classification clf = SVC(kernel="precomputed") clf.fit(K_train, train_dataset_target) y_pred = clf.predict(K_test) # Computes and prints the classification accuracy acc = accuracy_score(test_targets, y_pred) print("Accuracy:", str(round(acc * 100, 2)) + "%")
def segk(nodes, edgelist, radius, dim, kernel): n = len(nodes) if kernel == 'shortest_path': gk = [ ShortestPath(normalize=True, with_labels=True) for i in range(radius) ] elif kernel == 'weisfeiler_lehman': gk = [ WeisfeilerLehman(n_iter=4, normalize=True, base_graph_kernel=VertexHistogram) for i in range(radius) ] else: raise ValueError('Use a valid kernel!!') idx = np.random.permutation(n) sampled_nodes = [nodes[idx[i]] for i in range(dim)] remaining_nodes = [nodes[idx[i]] for i in range(dim, len(nodes))] egonet_edges, egonet_node_labels = extract_egonets(edgelist, radius) E = np.zeros((n, dim)) K = np.zeros((dim, dim)) K_prev = np.ones((dim, dim)) for i in range(1, radius + 1): Gs = list() for node in sampled_nodes: node_labels = { v: egonet_node_labels[node][v] for v in egonet_node_labels[node] if egonet_node_labels[node][v] <= i } edges = list() for edge in egonet_edges[node]: if edge[0] in node_labels and edge[1] in node_labels: edges.append((edge[0], edge[1])) edges.append((edge[1], edge[0])) Gs.append(Graph(edges, node_labels=node_labels)) K_i = gk[i - 1].fit_transform(Gs) K_i = np.multiply(K_prev, K_i) K += K_i K_prev = K_i U, S, V = svd(K) S = np.maximum(S, 1e-12) Norm = np.dot(U * 1. / np.sqrt(S), V) E[idx[:dim], :] = np.dot(K, Norm.T) K = np.zeros((n - dim, dim)) K_prev = np.ones((n - dim, dim)) for i in range(1, radius + 1): Gs = list() for node in remaining_nodes: node_labels = { v: egonet_node_labels[node][v] for v in egonet_node_labels[node] if egonet_node_labels[node][v] <= i } edges = list() for edge in egonet_edges[node]: if edge[0] in node_labels and edge[1] in node_labels: edges.append((edge[0], edge[1])) edges.append((edge[1], edge[0])) Gs.append(Graph(edges, node_labels=node_labels)) K_i = gk[i - 1].transform(Gs) K_i = np.multiply(K_prev, K_i) K += K_i K_prev = K_i E[idx[dim:], :] = np.dot(K, Norm.T) return E
split = 10 f = open('Accuracy_mean_origin.txt', 'a') temp_accs = [None] * 6 for iter_number in [2]: f.write("origin " + str(split) + "-fold cross-validation\n") for key, value in test_dataset.items(): dataset = fetch_dataset(value, verbose=False) G, y = dataset.data, dataset.target temp_accs[int(key) - 1] = [] for i in range(split): G_train, G_test, y_train, y_test = K_Flod_spilt( split, i, np.array(G), np.array(y), random_state_list[int(key) - 1]) gk = WeisfeilerLehman(n_iter=iter_number, base_graph_kernel=VertexHistogram, normalize=True) K_train = gk.fit_transform(G_train) K_test = gk.transform(G_test) # Uses the SVM classifier to perform classification # clf = RandomForestClassifier(n_estimators=35, random_state=39) # clf = AdaBoostClassifier(n_estimators=35, random_state=44) # SVC(kernel="precomputed") clf = SVC(kernel="poly") clf.fit(K_train, y_train) y_pred = clf.predict(K_test) # Computes and prints the classification accuracy acc = accuracy_score(y_test, y_pred) temp_accs[int(key) - 1].append(acc)
tokens_to_ids = dict() for token in sent: if token not in tokens_to_ids: tokens_to_ids[token] = len(tokens_to_ids) node_labels[tokens_to_ids[token]] = token edges = list() for i in range(len(sent) - 1): edges.append((tokens_to_ids[sent[i]], tokens_to_ids[sent[i + 1]])) word_networks.append(Graph(edges, node_labels=node_labels)) query_sent_id = 54 query_sent = [word_networks[query_sent_id]] # Initialize Weisfeiler-Lehman subtree kernel gk = WeisfeilerLehman(niter=2, normalize=True, base_kernel=VertexHistogram) print("Computing similarities\n") t0 = time.time() gk.fit(query_sent) K = gk.transform(word_networks) print("done in %0.3fs\n" % (time.time() - t0)) print("Query sentence") print("--------------") print(" ".join(sents[query_sent_id])) print() print("Most similar sentence") print("---------------------") print(" ".join(sents[np.argsort(K[:, 0])[-2]]))
import numpy as np from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import accuracy_score from grakel.datasets import fetch_dataset from grakel.kernels import ShortestPath, WeisfeilerLehman import sklearn # Loads the MUTAG dataset MUTAG = fetch_dataset("PROTEINS", verbose=True) G, y = MUTAG.data, MUTAG.target print(G,' ',y) # Splits the dataset into a training and a test set G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.3, random_state=42) # Uses the shortest path kernel to generate the kernel matrices gk = WeisfeilerLehman() K_train = gk.fit_transform(G_train) K_test = gk.transform(G_test) # Uses the SVM classifier to perform classification clf = SVC(kernel="precomputed") clf.fit(K_train, y_train) y_pred = clf.predict(K_test) # Computes and prints the classification accuracy acc = accuracy_score(y_test, y_pred) print("Accuracy:", str(round(acc*100, 2)) + "%")
for score_type, score_field in zip(scoring, score_fields) } data["method"] = method_id data["time"] = graphs[f"timings_{kernel_set}_{level}"].sum() results = results.append(pd.DataFrame(data), ignore_index=True) return results GRAKEL_KERNELS = { "GK-SPath": lambda: ShortestPath(normalize=NORMALIZING_GRAPH_KERNELS), "GK-EHist": lambda: EdgeHistogram(normalize=NORMALIZING_GRAPH_KERNELS), "GK-VHist": lambda: VertexHistogram(normalize=NORMALIZING_GRAPH_KERNELS), "GK-GSamp": lambda: GraphletSampling(normalize=NORMALIZING_GRAPH_KERNELS), "GK-WL-1": lambda: WeisfeilerLehman( n_iter=1, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS ), "GK-WL-2": lambda: WeisfeilerLehman( n_iter=2, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS ), "GK-WL-3": lambda: WeisfeilerLehman( n_iter=3, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS ), "GK-WL-4": lambda: WeisfeilerLehman( n_iter=4, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS ), "GK-WL-5": lambda: WeisfeilerLehman( n_iter=5, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS ), "GK-NH": lambda: NeighborhoodHash( n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS