def compute_edgehist_kernel(mols, params):
    """
    Compute edge hist kernel
    Arguments:
            mols {list[Molecule]} -- [description]
    """
    par = params["cont_par"]
    mol_graphs_list = [mol2graph_igraph(m) for m in mols]
    return gk.CalculateEdgeHistKernel(mol_graphs_list, par=par)
def compute_kernel_distance_matrices(slice_subgraphs, kernel_params):
    # Relabel based on requested graph kernels
    kernel_label_pair_to_relabeled_graphs = get_relabeled_graphs(
        slice_subgraphs, kernel_params)

    # Actually compute the kernel distance matrices
    kernel_to_distance_matrix = {}
    for kp in kernel_params:
        kernel = kp["name"]
        params = kp["params"]
        # Compute Weisfeiler-Lehman subtree pattern kernel
        if kernel == "wlst":
            n_iters = params["n_iters"]
            label = params["label"]
            kernel_label_pair = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[
                kernel_label_pair]
            kernel_mat = gk.CalculateWLKernel(relabeled_graphs, n_iters)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_params_key = (kernel, label, n_iters)
            kernel_to_distance_matrix[kernel_params_key] = distance_mat
        # Compute edge-histogram kernel
        elif kernel == "eh":
            label = params["label"]
            kernel_label_pair = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[
                kernel_label_pair]
            kernel_mat = gk.CalculateEdgeHistKernel(relabeled_graphs)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_key = (kernel, label)
            kernel_to_distance_matrix[kernel_key] = distance_mat
        # Compute vertex-histogram kernel
        elif kernel == "vh":
            label = params["label"]
            key = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[key]
            kernel_mat = gk.CalculateVertexHistKernel(relabeled_graphs)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_to_distance_matrix[key] = distance_mat
        else:
            raise NotImplementedError(
                "Kernel: {} not supported".format(kernel))
    return kernel_to_distance_matrix
Exemple #3
0
def compute_all_kernels(graphs):
    return (
        gk.CalculateEdgeHistKernel(graphs),
        gk.CalculateVertexHistKernel(graphs),
        gk.CalculateVertexEdgeHistKernel(graphs),
        gk.CalculateVertexVertexEdgeHistKernel(graphs),
        gk.CalculateEdgeHistGaussKernel(graphs),
        gk.CalculateVertexHistGaussKernel(graphs),
        gk.CalculateVertexEdgeHistGaussKernel(graphs),
        gk.CalculateGeometricRandomWalkKernel(graphs),
        # gk.CalculateExponentialRandomWalkKernel(graphs),
        gk.CalculateKStepRandomWalkKernel(graphs, [1.0]),
        gk.CalculateWLKernel(graphs),
        gk.CalculateConnectedGraphletKernel(graphs, 3),
        gk.CalculateConnectedGraphletKernel(graphs, 4),
        gk.CalculateConnectedGraphletKernel(graphs, 5),
        gk.CalculateGraphletKernel(graphs, 3),
        gk.CalculateGraphletKernel(graphs, 4),
        gk.CalculateShortestPathKernel(graphs),
    )
 def calculate_kernel_matrices(self, igraph_list):
     kernel_matrices = []
     kernel_matrices.append(gk.CalculateEdgeHistKernel(igraph_list))
     kernel_matrices.append(gk.CalculateVertexHistKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexVertexEdgeHistKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexVertexEdgeHistKernel(igraph_list))
     kernel_matrices.append(gk.CalculateVertexHistGaussKernel(igraph_list))
     kernel_matrices.append(gk.CalculateEdgeHistGaussKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexEdgeHistGaussKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateGeometricRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateExponentialRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateKStepRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateShortestPathKernel(igraph_list))
     kernel_matrices.append(gk.CalculateWLKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateGraphletKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateConnectedGraphletKernel(igraph_list))
     return kernel_matrices
Exemple #5
0
def main(args, logger):

    graphs = [ig.read(filename) for filename in args.FILES]
    labels = read_labels(args.labels)

    # Set the label to be uniform over all graphs in case no labels are
    # available. This essentially changes our iteration to degree-based
    # checks.
    for graph in graphs:
        if 'label' not in graph.vs.attributes():
            graph.vs['label'] = [0] * len(graph.vs)

    logger.info('Read {} graphs and {} labels'.format(len(graphs),
                                                      len(labels)))

    assert len(graphs) == len(labels)

    # Calculate graph kernel
    gram_matrix = gk.CalculateEdgeHistKernel(graphs)

    y = LabelEncoder().fit_transform(labels)

    #np.random.seed(42)
    mean_accuracies = []

    params = ['balanced']
    cv_results = []
    entry = {}
    for param in params:
        entry[param] = args.__dict__[param]
    entry['dataset'] = dirname(args.FILES[0]).split('/')[1]
    entry['baseline'] = 'edge hist kernel'
    for i in range(10):
        # Contains accuracy scores for each cross validation step; the
        # means of this list will be used later on.
        accuracy_scores = []
        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
        for n, indices in enumerate(cv.split(graphs, y)):

            entry_fold = copy.copy(entry)
            train_index = indices[0]
            test_index = indices[1]

            pipeline = Pipeline(
                [('clf',
                  SVC(class_weight='balanced' if args.balanced else None,
                      random_state=42,
                      kernel='precomputed'))], )

            grid_params = {'clf__C': [1e-1, 1e0, 1e1]}

            X_train, X_test = gram_matrix[
                train_index][:,
                             train_index], gram_matrix[test_index][:,
                                                                   train_index]
            y_train, y_test = y[train_index], y[test_index]

            kgscv = KernelGridSearchCV(
                pipeline,
                param_grid=grid_params,
                cv=cv,
            )
            kgscv.fit(X_train, y_train)
            p = kgscv._best_params
            sc = kgscv._best_score
            clf = kgscv._best_estimator
            clf.fit(X_train, y_train)

            y_pred = clf.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            accuracy_scores.append(acc)

            for param, param_val in kgscv._best_params.items():
                entry_fold[param] = param_val
                entry[param] = ''
            entry_fold['fold'] = n + 1
            entry_fold['it'] = i
            entry_fold['acc'] = acc * 100
            entry_fold['std'] = 0.0
            cv_results.append(entry_fold)

            logger.info('Best classifier for this fold:{}'.format(
                kgscv._best_params))

        mean_accuracies.append(np.mean(accuracy_scores))
        logger.info(
            '  - Mean 10-fold accuracy: {:2.2f} [running mean over all folds: {:2.2f}]'
            .format(mean_accuracies[-1] * 100,
                    np.mean(mean_accuracies) * 100))
    entry['fold'] = 'all'
    entry['it'] = 'all'
    entry['acc'] = np.mean(mean_accuracies) * 100
    entry['std'] = np.std(mean_accuracies) * 100
    cv_results.append(entry)
    logger.info('Accuracy: {:2.2f} +- {:2.2f}'.format(
        np.mean(mean_accuracies) * 100,
        np.std(mean_accuracies) * 100))

    if exists(args.result_file):
        with open(args.result_file, 'a') as f:
            pd.DataFrame(cv_results).to_csv(f, index=False, header=None)
    else:
        pd.DataFrame(cv_results).to_csv(args.result_file, index=False)
if __name__ == "__main__":
    catagrateryGraphList = []
    for i in range(len(DataSet)):  # 10类数据
        pointsGraphList = []
        pointsGraphList = GetGraphList(DataSet[i])  #获得单类的图列表
        catagrateryGraphList.append(pointsGraphList)
    print catagrateryGraphList

    sum_Graphlist = []
    for i in range(len(catagrateryGraphList)):
        for j in range(len(catagrateryGraphList[i])):
            sum_Graphlist.append(catagrateryGraphList[i][j])

    mutag_list = np.array(sum_Graphlist)
    ### ALL KERNELS COMPUTE
    K1 = gk.CalculateEdgeHistKernel(mutag_list)
    K2 = gk.CalculateVertexHistKernel(mutag_list)
    K3 = gk.CalculateVertexEdgeHistKernel(mutag_list)
    K4 = gk.CalculateVertexVertexEdgeHistKernel(mutag_list)
    K5 = gk.CalculateEdgeHistGaussKernel(mutag_list)
    K6 = gk.CalculateVertexHistGaussKernel(mutag_list)
    K7 = gk.CalculateVertexEdgeHistGaussKernel(mutag_list)
    # K8 = gk.CalculateGeometricRandomWalkKernel(mutag_list)
    # K9 = gk.CalculateExponentialRandomWalkKernel(mutag_list)
    K10 = gk.CalculateKStepRandomWalkKernel(mutag_list)
    K11 = gk.CalculateWLKernel(mutag_list)
    K12 = gk.CalculateConnectedGraphletKernel(mutag_list, 4)
    K13 = gk.CalculateGraphletKernel(mutag_list, 4)
    # K14 = gk.CalculateShortestPathKernel(mutag_list)
    #获得10类,50种图,计算相识度
    pass