def compute_kernel_distance_matrices(slice_subgraphs, kernel_params): # Relabel based on requested graph kernels kernel_label_pair_to_relabeled_graphs = get_relabeled_graphs( slice_subgraphs, kernel_params) # Actually compute the kernel distance matrices kernel_to_distance_matrix = {} for kp in kernel_params: kernel = kp["name"] params = kp["params"] # Compute Weisfeiler-Lehman subtree pattern kernel if kernel == "wlst": n_iters = params["n_iters"] label = params["label"] kernel_label_pair = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[ kernel_label_pair] kernel_mat = gk.CalculateWLKernel(relabeled_graphs, n_iters) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_params_key = (kernel, label, n_iters) kernel_to_distance_matrix[kernel_params_key] = distance_mat # Compute edge-histogram kernel elif kernel == "eh": label = params["label"] kernel_label_pair = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[ kernel_label_pair] kernel_mat = gk.CalculateEdgeHistKernel(relabeled_graphs) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_key = (kernel, label) kernel_to_distance_matrix[kernel_key] = distance_mat # Compute vertex-histogram kernel elif kernel == "vh": label = params["label"] key = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[key] kernel_mat = gk.CalculateVertexHistKernel(relabeled_graphs) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_to_distance_matrix[key] = distance_mat else: raise NotImplementedError( "Kernel: {} not supported".format(kernel)) return kernel_to_distance_matrix
def compute_all_kernels(graphs): return ( gk.CalculateEdgeHistKernel(graphs), gk.CalculateVertexHistKernel(graphs), gk.CalculateVertexEdgeHistKernel(graphs), gk.CalculateVertexVertexEdgeHistKernel(graphs), gk.CalculateEdgeHistGaussKernel(graphs), gk.CalculateVertexHistGaussKernel(graphs), gk.CalculateVertexEdgeHistGaussKernel(graphs), gk.CalculateGeometricRandomWalkKernel(graphs), # gk.CalculateExponentialRandomWalkKernel(graphs), gk.CalculateKStepRandomWalkKernel(graphs, [1.0]), gk.CalculateWLKernel(graphs), gk.CalculateConnectedGraphletKernel(graphs, 3), gk.CalculateConnectedGraphletKernel(graphs, 4), gk.CalculateConnectedGraphletKernel(graphs, 5), gk.CalculateGraphletKernel(graphs, 3), gk.CalculateGraphletKernel(graphs, 4), gk.CalculateShortestPathKernel(graphs), )
def calculate_kernel_matrices(self, igraph_list): kernel_matrices = [] kernel_matrices.append(gk.CalculateEdgeHistKernel(igraph_list)) kernel_matrices.append(gk.CalculateVertexHistKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexVertexEdgeHistKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexVertexEdgeHistKernel(igraph_list)) kernel_matrices.append(gk.CalculateVertexHistGaussKernel(igraph_list)) kernel_matrices.append(gk.CalculateEdgeHistGaussKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexEdgeHistGaussKernel(igraph_list)) #kernel_matrices.append(gk.CalculateGeometricRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateExponentialRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateKStepRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateShortestPathKernel(igraph_list)) kernel_matrices.append(gk.CalculateWLKernel(igraph_list)) #kernel_matrices.append(gk.CalculateGraphletKernel(igraph_list)) #kernel_matrices.append(gk.CalculateConnectedGraphletKernel(igraph_list)) return kernel_matrices
def evaluate_vertex_histogram_kernel(graphs, graph_labels, label_requests, n_folds=10, seed=None): """ """ # Print progress print("Evaluating Vertex-Histogram Kernel") print() # Just a few sanity checks assert (len(graphs) == len(graph_labels)) # Mapping from vertex labeling to results vertex_labeling_to_results = {} # Sweep over vertex label options for lr in label_requests: # Unpack requested_vertex_label = lr["vertex"] # Print progress print("Vertex Label: {}".format(requested_vertex_label)) print() # Convert the base graphs into representations with # the requested vertex and edge labels, if any. relabeled_graphs = [ relabel_for_wlst_kernel(g, label=requested_vertex_label) for g in graphs ] # Define lists to track non-determinism fraction prediction results # over multiple folds true_nd_vals = [] pred_nd_vals = [] # Compute kernel matrix k_mat = gk.CalculateVertexHistKernel(relabeled_graphs) # Define training and testing sets graph_indices = list(range(len(graph_labels))) kf = KFold(n_splits=n_folds, random_state=seed, shuffle=True) for split_idx, (train_indices, test_indices) in enumerate(kf.split(graph_indices)): # Print progress print("Running split {}/{}".format(split_idx + 1, n_folds)) # Get training and testing graphs g_train = [relabeled_graphs[i] for i in train_indices] g_test = [relabeled_graphs[i] for i in test_indices] # Get the non-determinism fraction values for the training and # testing graphs y_train = [graph_labels[i] for i in train_indices] y_test = [graph_labels[i] for i in test_indices] # Retrieve embeddings of training and test graphs k_train = np.zeros((len(train_indices), len(train_indices))) k_test = np.zeros((len(test_indices), len(train_indices))) for i in range(len(train_indices)): for j in range(len(train_indices)): k_train[i][j] = k_mat[train_indices[i]][train_indices[j]] for i in range(len(test_indices)): for j in range(len(train_indices)): k_test[i][j] = k_mat[test_indices[i]][train_indices[j]] # Train SVM regressor using precomputed kernel matrix model = svm.SVR("precomputed").fit(k_train, y_train) model.fit(k_train, y_train) # Evaluate model against the embeddedings of the test graphs y_pred = model.predict(k_test) # Print progress print("Done with split {}/{}".format(split_idx + 1, n_folds)) print() # Aggregate results for this fold true_nd_vals += list(y_test) pred_nd_vals += list(y_pred) # Aggregate results for this vertex labeling vertex_labeling_to_results[requested_vertex_label] = { "true": true_nd_vals, "pred": pred_nd_vals } return vertex_labeling_to_results
def main(args, logger): graphs = [ig.read(filename) for filename in args.FILES] labels = read_labels(args.labels) # Set the label to be uniform over all graphs in case no labels are # available. This essentially changes our iteration to degree-based # checks. for graph in graphs: if 'label' not in graph.vs.attributes(): graph.vs['label'] = [0] * len(graph.vs) logger.info('Read {} graphs and {} labels'.format(len(graphs), len(labels))) assert len(graphs) == len(labels) # Calculate graph kernel gram_matrix = gk.CalculateVertexHistKernel(graphs) y = LabelEncoder().fit_transform(labels) np.random.seed(42) mean_accuracies = [] params = ['balanced'] cv_results = [] entry = {} for param in params: entry[param] = args.__dict__[param] entry['dataset'] = dirname(args.FILES[0]).split('/')[1] entry['baseline'] = 'vertex hist kernel' for i in range(10): # Contains accuracy scores for each cross validation step; the # means of this list will be used later on. accuracy_scores = [] cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i) for n, indices in enumerate(cv.split(graphs, y)): entry_fold = copy.copy(entry) train_index = indices[0] test_index = indices[1] pipeline = Pipeline( [('clf', SVC(class_weight='balanced' if args.balanced else None, random_state=42, kernel='precomputed'))], ) grid_params = {'clf__C': [1e1]} X_train, X_test = gram_matrix[ train_index][:, train_index], gram_matrix[test_index][:, train_index] y_train, y_test = y[train_index], y[test_index] kgscv = KernelGridSearchCV(pipeline, param_grid=grid_params, cv=cv, random_state=42) kgscv.fit(X_train, y_train) p = kgscv._best_params sc = kgscv._best_score clf = kgscv._best_estimator clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) accuracy_scores.append(acc) for param, param_val in kgscv._best_params.items(): entry_fold[param] = param_val entry[param] = '' entry_fold['fold'] = n + 1 entry_fold['it'] = i entry_fold['acc'] = acc * 100 entry_fold['std'] = 0.0 cv_results.append(entry_fold) logger.info('Best classifier for this fold:{}'.format( kgscv._best_params)) mean_accuracies.append(np.mean(accuracy_scores)) logger.info( ' - Mean 10-fold accuracy: {:2.2f} [running mean over all folds: {:2.2f}]' .format(mean_accuracies[-1] * 100, np.mean(mean_accuracies) * 100)) entry['fold'] = 'all' entry['it'] = 'all' entry['acc'] = np.mean(mean_accuracies) * 100 entry['std'] = np.std(mean_accuracies) * 100 cv_results.append(entry) logger.info('Accuracy: {:2.2f} +- {:2.2f}'.format( np.mean(mean_accuracies) * 100, np.std(mean_accuracies) * 100)) if exists(args.result_file): with open(args.result_file, 'a') as f: pd.DataFrame(cv_results).to_csv(f, index=False, header=None) else: pd.DataFrame(cv_results).to_csv(args.result_file, index=False)
catagrateryGraphList = [] for i in range(len(DataSet)): # 10类数据 pointsGraphList = [] pointsGraphList = GetGraphList(DataSet[i]) #获得单类的图列表 catagrateryGraphList.append(pointsGraphList) print catagrateryGraphList sum_Graphlist = [] for i in range(len(catagrateryGraphList)): for j in range(len(catagrateryGraphList[i])): sum_Graphlist.append(catagrateryGraphList[i][j]) mutag_list = np.array(sum_Graphlist) ### ALL KERNELS COMPUTE K1 = gk.CalculateEdgeHistKernel(mutag_list) K2 = gk.CalculateVertexHistKernel(mutag_list) K3 = gk.CalculateVertexEdgeHistKernel(mutag_list) K4 = gk.CalculateVertexVertexEdgeHistKernel(mutag_list) K5 = gk.CalculateEdgeHistGaussKernel(mutag_list) K6 = gk.CalculateVertexHistGaussKernel(mutag_list) K7 = gk.CalculateVertexEdgeHistGaussKernel(mutag_list) # K8 = gk.CalculateGeometricRandomWalkKernel(mutag_list) # K9 = gk.CalculateExponentialRandomWalkKernel(mutag_list) K10 = gk.CalculateKStepRandomWalkKernel(mutag_list) K11 = gk.CalculateWLKernel(mutag_list) K12 = gk.CalculateConnectedGraphletKernel(mutag_list, 4) K13 = gk.CalculateGraphletKernel(mutag_list, 4) # K14 = gk.CalculateShortestPathKernel(mutag_list) #获得10类,50种图,计算相识度 pass