def compute_edgehist_kernel(mols, params): """ Compute edge hist kernel Arguments: mols {list[Molecule]} -- [description] """ par = params["cont_par"] mol_graphs_list = [mol2graph_igraph(m) for m in mols] return gk.CalculateEdgeHistKernel(mol_graphs_list, par=par)
def compute_kernel_distance_matrices(slice_subgraphs, kernel_params): # Relabel based on requested graph kernels kernel_label_pair_to_relabeled_graphs = get_relabeled_graphs( slice_subgraphs, kernel_params) # Actually compute the kernel distance matrices kernel_to_distance_matrix = {} for kp in kernel_params: kernel = kp["name"] params = kp["params"] # Compute Weisfeiler-Lehman subtree pattern kernel if kernel == "wlst": n_iters = params["n_iters"] label = params["label"] kernel_label_pair = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[ kernel_label_pair] kernel_mat = gk.CalculateWLKernel(relabeled_graphs, n_iters) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_params_key = (kernel, label, n_iters) kernel_to_distance_matrix[kernel_params_key] = distance_mat # Compute edge-histogram kernel elif kernel == "eh": label = params["label"] kernel_label_pair = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[ kernel_label_pair] kernel_mat = gk.CalculateEdgeHistKernel(relabeled_graphs) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_key = (kernel, label) kernel_to_distance_matrix[kernel_key] = distance_mat # Compute vertex-histogram kernel elif kernel == "vh": label = params["label"] key = (kernel, label) relabeled_graphs = kernel_label_pair_to_relabeled_graphs[key] kernel_mat = gk.CalculateVertexHistKernel(relabeled_graphs) distance_mat = convert_to_distance_matrix(kernel_mat) kernel_to_distance_matrix[key] = distance_mat else: raise NotImplementedError( "Kernel: {} not supported".format(kernel)) return kernel_to_distance_matrix
def compute_all_kernels(graphs): return ( gk.CalculateEdgeHistKernel(graphs), gk.CalculateVertexHistKernel(graphs), gk.CalculateVertexEdgeHistKernel(graphs), gk.CalculateVertexVertexEdgeHistKernel(graphs), gk.CalculateEdgeHistGaussKernel(graphs), gk.CalculateVertexHistGaussKernel(graphs), gk.CalculateVertexEdgeHistGaussKernel(graphs), gk.CalculateGeometricRandomWalkKernel(graphs), # gk.CalculateExponentialRandomWalkKernel(graphs), gk.CalculateKStepRandomWalkKernel(graphs, [1.0]), gk.CalculateWLKernel(graphs), gk.CalculateConnectedGraphletKernel(graphs, 3), gk.CalculateConnectedGraphletKernel(graphs, 4), gk.CalculateConnectedGraphletKernel(graphs, 5), gk.CalculateGraphletKernel(graphs, 3), gk.CalculateGraphletKernel(graphs, 4), gk.CalculateShortestPathKernel(graphs), )
def calculate_kernel_matrices(self, igraph_list): kernel_matrices = [] kernel_matrices.append(gk.CalculateEdgeHistKernel(igraph_list)) kernel_matrices.append(gk.CalculateVertexHistKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexVertexEdgeHistKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexVertexEdgeHistKernel(igraph_list)) kernel_matrices.append(gk.CalculateVertexHistGaussKernel(igraph_list)) kernel_matrices.append(gk.CalculateEdgeHistGaussKernel(igraph_list)) kernel_matrices.append( gk.CalculateVertexEdgeHistGaussKernel(igraph_list)) #kernel_matrices.append(gk.CalculateGeometricRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateExponentialRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateKStepRandomWalkKernel(igraph_list)) #kernel_matrices.append(gk.CalculateShortestPathKernel(igraph_list)) kernel_matrices.append(gk.CalculateWLKernel(igraph_list)) #kernel_matrices.append(gk.CalculateGraphletKernel(igraph_list)) #kernel_matrices.append(gk.CalculateConnectedGraphletKernel(igraph_list)) return kernel_matrices
def main(args, logger): graphs = [ig.read(filename) for filename in args.FILES] labels = read_labels(args.labels) # Set the label to be uniform over all graphs in case no labels are # available. This essentially changes our iteration to degree-based # checks. for graph in graphs: if 'label' not in graph.vs.attributes(): graph.vs['label'] = [0] * len(graph.vs) logger.info('Read {} graphs and {} labels'.format(len(graphs), len(labels))) assert len(graphs) == len(labels) # Calculate graph kernel gram_matrix = gk.CalculateEdgeHistKernel(graphs) y = LabelEncoder().fit_transform(labels) #np.random.seed(42) mean_accuracies = [] params = ['balanced'] cv_results = [] entry = {} for param in params: entry[param] = args.__dict__[param] entry['dataset'] = dirname(args.FILES[0]).split('/')[1] entry['baseline'] = 'edge hist kernel' for i in range(10): # Contains accuracy scores for each cross validation step; the # means of this list will be used later on. accuracy_scores = [] cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i) for n, indices in enumerate(cv.split(graphs, y)): entry_fold = copy.copy(entry) train_index = indices[0] test_index = indices[1] pipeline = Pipeline( [('clf', SVC(class_weight='balanced' if args.balanced else None, random_state=42, kernel='precomputed'))], ) grid_params = {'clf__C': [1e-1, 1e0, 1e1]} X_train, X_test = gram_matrix[ train_index][:, train_index], gram_matrix[test_index][:, train_index] y_train, y_test = y[train_index], y[test_index] kgscv = KernelGridSearchCV( pipeline, param_grid=grid_params, cv=cv, ) kgscv.fit(X_train, y_train) p = kgscv._best_params sc = kgscv._best_score clf = kgscv._best_estimator clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) accuracy_scores.append(acc) for param, param_val in kgscv._best_params.items(): entry_fold[param] = param_val entry[param] = '' entry_fold['fold'] = n + 1 entry_fold['it'] = i entry_fold['acc'] = acc * 100 entry_fold['std'] = 0.0 cv_results.append(entry_fold) logger.info('Best classifier for this fold:{}'.format( kgscv._best_params)) mean_accuracies.append(np.mean(accuracy_scores)) logger.info( ' - Mean 10-fold accuracy: {:2.2f} [running mean over all folds: {:2.2f}]' .format(mean_accuracies[-1] * 100, np.mean(mean_accuracies) * 100)) entry['fold'] = 'all' entry['it'] = 'all' entry['acc'] = np.mean(mean_accuracies) * 100 entry['std'] = np.std(mean_accuracies) * 100 cv_results.append(entry) logger.info('Accuracy: {:2.2f} +- {:2.2f}'.format( np.mean(mean_accuracies) * 100, np.std(mean_accuracies) * 100)) if exists(args.result_file): with open(args.result_file, 'a') as f: pd.DataFrame(cv_results).to_csv(f, index=False, header=None) else: pd.DataFrame(cv_results).to_csv(args.result_file, index=False)
if __name__ == "__main__": catagrateryGraphList = [] for i in range(len(DataSet)): # 10类数据 pointsGraphList = [] pointsGraphList = GetGraphList(DataSet[i]) #获得单类的图列表 catagrateryGraphList.append(pointsGraphList) print catagrateryGraphList sum_Graphlist = [] for i in range(len(catagrateryGraphList)): for j in range(len(catagrateryGraphList[i])): sum_Graphlist.append(catagrateryGraphList[i][j]) mutag_list = np.array(sum_Graphlist) ### ALL KERNELS COMPUTE K1 = gk.CalculateEdgeHistKernel(mutag_list) K2 = gk.CalculateVertexHistKernel(mutag_list) K3 = gk.CalculateVertexEdgeHistKernel(mutag_list) K4 = gk.CalculateVertexVertexEdgeHistKernel(mutag_list) K5 = gk.CalculateEdgeHistGaussKernel(mutag_list) K6 = gk.CalculateVertexHistGaussKernel(mutag_list) K7 = gk.CalculateVertexEdgeHistGaussKernel(mutag_list) # K8 = gk.CalculateGeometricRandomWalkKernel(mutag_list) # K9 = gk.CalculateExponentialRandomWalkKernel(mutag_list) K10 = gk.CalculateKStepRandomWalkKernel(mutag_list) K11 = gk.CalculateWLKernel(mutag_list) K12 = gk.CalculateConnectedGraphletKernel(mutag_list, 4) K13 = gk.CalculateGraphletKernel(mutag_list, 4) # K14 = gk.CalculateShortestPathKernel(mutag_list) #获得10类,50种图,计算相识度 pass