Ejemplo n.º 1
0
def test_multiscale_laplacian_pd():
    """Random input test for the Multiscale Laplacian kernel [n_jobs=-1/generic-wrapper]."""
    # Intialise kernel
    train, test = generate_dataset(n_graphs=30,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=10,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel="ML",
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 2
0
def test_neighborhood_subgraph_pairwise_distance():
    """Random input test for the Neighborhood Subgraph Pairwise Distance kernel [+ generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 5, 'el', 4))

    nspd_kernel = NeighborhoodSubgraphPairwiseDistance(verbose=verbose,
                                                       normalize=normalize)
    gk = GraphKernel(kernel="NSPD", verbose=verbose, normalize=normalize)

    try:
        nspd_kernel.fit_transform(train)
        nspd_kernel.transform(test)
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 3
0
adjacency_matrices = np.array([[cell['am']] for cell in dataset["G"][0]])
labels = np.array([label[0] for label in dataset["labels"]])

X = adjacency_matrices
y = labels

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=train_size,
                                                    test_size=test_size,
                                                    shuffle=True,
                                                    random_state=42)

randomWalkKernel = GraphKernel(kernel={
    "name": "random_walk",
    "with_labels": False
},
                               normalize=True)
graphletKernel = GraphKernel(kernel={"name": "graphlet_sampling"},
                             normalize=True)
shortestPathKernel = GraphKernel(kernel={"name": "shortest_path"},
                                 normalize=True)

# Calculate the kernel matrix for random Walk Kernel.
K_train = randomWalkKernel.fit_transform(X_train)
K_test = randomWalkKernel.transform(X_test)
'''nanel = 0

print (K_train[0][79-5])
print(len(K_train))
print(len(K_train[0]))
Ejemplo n.º 4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='WL subtree kernel')
    parser.add_argument('--dataset',
                        type=str,
                        default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument(
        '--seed',
        type=int,
        default=0,
        help='random seed for splitting the dataset into 10 (default: 0)')
    parser.add_argument(
        '--fold_idx',
        type=int,
        default=0,
        help='the index of fold in 10-fold validation. Should be less then 10.'
    )
    parser.add_argument('--iter',
                        type=int,
                        default=5,
                        help='Number of iteration for the WL')
    parser.add_argument('--normalize',
                        action="store_true",
                        help='normalize the feature or not')
    parser.add_argument('--filename', type=str, default="", help='output file')
    args = parser.parse_args()

    np.random.seed(0)
    graphs, num_classes = load_data(args.dataset, False)

    ##10-fold cross validation, consider the particular fold.
    train_graphs, test_graphs = separate_data(graphs, args.seed, args.fold_idx)

    #SVM hyper-parameter to tune
    C_list = [0.01, 0.1, 1, 10, 100]
    X_train, y_train = convert(train_graphs)
    X_test, y_test = convert(test_graphs)

    wl_kernel = GraphKernel(kernel=[{
        "name": "weisfeiler_lehman",
        "niter": args.iter
    }, {
        "name": "subtree_wl"
    }],
                            normalize=args.normalize)
    K_train = wl_kernel.fit_transform(X_train)
    K_test = wl_kernel.transform(X_test)

    train_acc = []
    test_acc = []
    for C in C_list:
        clf = SVC(kernel='precomputed', C=C)
        clf.fit(K_train, y_train)
        y_pred_test = clf.predict(K_test)
        y_pred_train = clf.predict(K_train)
        train_acc.append(accuracy_score(y_train, y_pred_train) * 100)
        test_acc.append(accuracy_score(y_test, y_pred_test) * 100)

    print(train_acc)
    print(test_acc)

    if not args.filename == "":
        np.savetxt(args.filename, np.array([train_acc, test_acc]).transpose())
Ejemplo n.º 5
0
import numpy as np

from time import time

from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

from grakel import datasets
from grakel import GraphKernel

# Loads the Mutag dataset from:
# https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
# the biggest collection of benchmark datasets for graph_kernels.
mutag = datasets.fetch_dataset("MUTAG", verbose=False)
G, y = mutag.data, mutag.target
C_grid = (10.**np.arange(1, 10, 1) / len(G)).tolist()
n_folds = 10

estimator = make_pipeline(
    GraphKernel(kernel=dict(name="shortest_path"), normalize=True),
    GridSearchCV(svm.SVC(kernel='precomputed'),
                 dict(C=C_grid),
                 scoring='accuracy'))

acc = accuracy_score(y, cross_val_predict(estimator, G, y, cv=n_folds))
print("Accuracy:", str(round(acc * 100, 2)) + "%")
Ejemplo n.º 6
0
def evaluate_kernel(graphs,
                    graph_labels,
                    kernel_def,
                    label_requests,
                    n_folds=10,
                    seed=None):
    """
    """

    # Print progress
    print("Kernel: {}".format(kernel_def))

    # Just a few sanity checks
    assert (len(graphs) == len(graph_labels))

    # Initialize graph kernel
    gk = GraphKernel(kernel=kernel_def, normalize=True)

    # Train kernel on each set of relabeled graphs
    results = []
    for lr in label_requests:

        # Print progress
        print("Vertex/Edge Labeling: {}".format(lr))

        # Convert the base graphs into GraKeL-compatible representations with
        # the requested vertex and edge labels, if any.
        relabeled_graphs = convert_graphs(graphs, lr)
        print("# relabeled graphs: {}".format(len(relabeled_graphs)))

        # Define lists to track non-determinism fraction prediction results
        # over multiple folds
        true_nd_vals = []
        pred_nd_vals = []
        mse_vals = []

        # Define training and testing sets
        graph_indices = list(range(len(graph_labels)))
        kf = KFold(n_splits=n_folds, random_state=seed, shuffle=True)
        for split_idx, (train_indices,
                        test_indices) in enumerate(kf.split(graph_indices)):

            # Print progress
            print("Running split {}/{}".format(split_idx, n_folds))

            # Get training and testing graphs
            g_train = [relabeled_graphs[i] for i in train_indices]
            g_test = [relabeled_graphs[i] for i in test_indices]
            print("# training graphs: {}".format(len(g_train)))
            print("# test graphs: {}".format(len(g_test)))

            # Get the non-determinism fraction values for the training and
            # testing graphs
            y_train = [graph_labels[i] for i in train_indices]
            y_test = [graph_labels[i] for i in test_indices]

            # Compute the graph kernel matrix
            k_train, k_test = compute_kernel_matrix(g_train, g_test, gk)

            print("K-train shape: {}".format(k_train.shape))
            print("K-test shape: {}".format(k_test.shape))

            # Train SVM regressor using precomputed kernel matrix
            y_pred = train_model(k_train, k_test, y_train)

            # Print progress
            print("Done with split {}/{}".format(split_idx, n_folds))
            print()

            # Aggregate results for this fold
            true_nd_vals += list(y_test)
            pred_nd_vals += list(y_pred)

        # Aggregate results for this labeling
        results.append({"true": true_nd_vals, "pred": pred_nd_vals})
    return results
Ejemplo n.º 7
0
def untangle(graph,
             k_hop,
             with_data: bool = True,
             with_call: bool = True,
             with_name: bool = True):
    seeds, list_of_graphs = deltaPDG_to_list_of_Graphs(graph, khop_k=k_hop)
    wl_subtree = GraphKernel(kernel=[{
        "name": "weisfeiler_lehman",
        "n_iter": 10
    }, {
        "name": "subtree_wl"
    }],
                             normalize=True)
    if len(list_of_graphs) > 0:
        similarities = defaultdict(lambda: (0, 0.0))
        for g1, g2 in itertools.combinations(list_of_graphs, 2):
            # The graph has to be converted to {Graph, Node_Labels, Edge_Labels}
            wl_subtree.fit(
                [graph_to_grakel(g1, with_data, with_call, with_name)])
            similarity = wl_subtree.transform(
                [graph_to_grakel(g2, with_data, with_call, with_name)])[0][0]
            similarities[(list_of_graphs.index(g1),
                          list_of_graphs.index(g2))] = similarity

        n = len(list_of_graphs)
        affinity = np.zeros(shape=(scipy.special.comb(n, 2, exact=True), ))
        args = list(enumerate(itertools.combinations(range(n), 2)))
        with ThreadPool(processes=min(os.cpu_count() - 1, 1)) as wp:
            for k, value in wp.imap_unordered(
                    lambda i: (i[0], similarities[(i[-1][0], i[-1][1])]),
                    args):
                affinity[k] += (1 - value
                                )  # affinity is distance! so (1 - sim)

        cluster = AgglomerativeClustering(n_clusters=None,
                                          distance_threshold=0.5,
                                          affinity='precomputed',
                                          linkage='complete')
        if len(affinity) < 2:
            if len(affinity) == 1:
                labels = np.asarray(
                    [0, 0]) if affinity[0] <= 0.5 else np.asarray([0, 1])
            else:
                labels = np.asarray([0])
        else:
            labels = cluster.fit_predict(
                scipy.spatial.distance.squareform(affinity))
    else:
        labels = None

    label = list()
    for node, data in graph.nodes(data=True):
        if 'color' in data.keys():
            i = seeds.index(node) if node in seeds else -1

            if labels is not None and i != -1:
                data['label'] = '%d: ' % labels[i] + data['label']
                label.append(labels[i])
                graph.add_node(node, **data)
            else:
                data['label'] = '-1: ' + data['label']
                label.append(-1)
                graph.add_node(node, **data)

    return graph
Ejemplo n.º 8
0
import networkx as nx
from grakel import GraphKernel
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

from rpcc.create_features import TextFeaturesExtractor
from rpcc.load_data import DataLoader

sp_kernel = GraphKernel(kernel={
    "name": "shortest_path",
    'with_labels': False
},
                        normalize=True)

dl_obj = DataLoader()
dl_obj.run_data_preparation()

# creating a label binarizer instance in order to convert the classes to one hot vectors
lb = LabelEncoder()

# extracting the train targets
y_train = dl_obj.y_train

# converting the train targets to one hot
y_train_one_hot = lb.fit_transform(y_train)

# extracting the train targets
y_val = dl_obj.y_val

# converting the train targets to one hot
y_val_one_hot = lb.transform(y_val)
               bone_atoms_list={
                   'Ph': ['1a'],
                   'C': ['2a'],
                   'O': ['3a']
               },
               side_atoms_list={'H': ['2a']},
               additional_or_special_bonds_list=[['2a', '3a', 'double']])
y20 = 98

#%%
from kernelSVR import kernelSVR
ks = kernelSVR()
gk = GraphKernel(kernel={
    "name": "multiscale_laplacian",
    "which": "fast",
    "L": 1,
    "P": 10,
    "N": 10
})
#ks.add_kernel(gk)
ignoreH = False
expandPh = True
mx_use = toGraKelList(mx_train, ignoreH, expandPh)
#mx_use = toGraKelList(mx_full, ignoreH, expandPh)
ks.fit_kernel(mx_use)  #, my_train)
ks.fit_SVRs(my_train)

mx_use_test = toGraKelList(mx_test, ignoreH, expandPh)
#mx_use_test = mx_use
ks_pred_all = ks.predict(mx_use_test, 'all')
ks_pred = ks.predict(mx_use_test)
Ejemplo n.º 10
0
from datasets_utils import load_shock_dataset, load_ppi_dataset
from utils import compute_distance_matrix

X, y = load_shock_dataset()
# X, y = load_ppi_dataset()

## KFOLD

# Shuffle data
idx = np.random.permutation(len(X))
X, y = X[idx], y[idx]

# Initialize chosen Kernel
spk = GraphKernel(kernel={
    "name": "shortest_path",
    "with_labels": False
},
                  normalize=True)

# Split indexes according to Kfold with k = 10
k = 10
kf = KFold(n_splits=k)

# initialize scores lists
scores1 = []
scores2 = []

for train_index, test_index in kf.split(X):

    # split train and test of K-fold
    X_train, X_test = X[train_index], X[test_index]
Ejemplo n.º 11
0
                kf = StratifiedKFold(n_splits=10, shuffle=True)
                accs = []
                for train_index, test_index in kf.split(G, y):

                    start = time()

                    G_train = [G[idx] for idx in train_index]
                    y_train = [y[idx] for idx in train_index]
                    G_test = [G[idx] for idx in test_index]
                    y_test = [y[idx] for idx in test_index]

                    # Initialise a weifeiler kernel, with a dirac base_kernel.
                    gk = GraphKernel(kernel=[{
                        "name": "weisfeiler_lehman",
                        "niter": niter
                    }, {
                        "name": "subtree_wl"
                    }],
                                     normalize=True)

                    # Calculate the kernel matrix.
                    K_train = gk.fit_transform(G_train)
                    K_test = gk.transform(G_test)

                    # Initialise an SVM and fit.
                    clf = svm.SVC(kernel='precomputed', C=1)
                    params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
                    clf = GridSearchCV(svm.SVC(kernel='precomputed'),
                                       params,
                                       cv=10,
                                       scoring='accuracy',
Ejemplo n.º 12
0
# Loads the Mutag dataset from:
# https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
# the biggest collection of benchmark datasets for graph_kernels.
mutag = datasets.fetch_dataset("MUTAG", verbose=False)
G, y = mutag.data, mutag.target

# Train-test split of graph data
G_train, G_test, y_train, y_test = train_test_split(G,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=42)

start = time()
# Initialise a weifeiler kernel, with a dirac base_kernel.
gk = GraphKernel(kernel=[{"name": "WL", "n_iter": 5}, "ST-WL"], normalize=True)

# Calculate the kernel matrix.
K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)
end = time()

# Initialise an SVM and fit.
clf = svm.SVC(kernel='precomputed', C=1)
clf.fit(K_train, y_train)

# Predict and test.
y_pred = clf.predict(K_test)

# Calculate accuracy of classification.
acc = accuracy_score(y_test, y_pred)
Ejemplo n.º 13
0
def compute_kernel_matrix_grakel(event_graphs, kernel_params):
    kernel = GraphKernel(kernel_params)
    kernel_mat = kernel.fit_transform(event_graphs)
    return kernel_mat
Ejemplo n.º 14
0
    G_train, G_test = list(), list()
    y_train, y_test = list(), list()
    for (i, (g, t)) in enumerate(zip(G, y)):
        if len(tri) and i == tri[0]:
            G_train.append(g)
            y_train.append(t)
            tri.pop(0)
        elif len(tei) and i == tei[0]:
            G_test.append(g)
            y_test.append(t)
            tei.pop(0)

    start = time()
    gk = GraphKernel(kernel={"name": "multiscale_laplacian",
                             "which": "fast",
                             "L": 1,
                             "P": 10,
                             "n_samples": 10})

    # Calculate the kernel matrix.
    K_train = gk.fit_transform(G_train)
    K_test = gk.transform(G_test)
    end = time()

    # Cross validation on C, variable
    acc = 0
    for c in C_grid:
        # Initialise an SVM and fit.
        clf = svm.SVC(kernel='precomputed', C=c)

        # Fit on the train Kernel
Ejemplo n.º 15
0
if __name__ == '__main__':

    H2O = Graph([[0, 1, 1], [1, 0, 0], [1, 0, 0]], {0: 'O', 1: 'H', 2: 'H'})
    H3O = Graph([[0, 1, 1, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], {
        0: 'O',
        1: 'H',
        2: 'H',
        3: 'H'
    })
    H2Od = dict()
    H2Od[0] = Graph({'a': {'b': 1., 'c': 1.}, 'b': {'a': 1}, 'c': {'a': 1}})
    H2Od[1] = Graph({
        ('a', 'b'): 1.,
        ('a', 'c'): 1.,
        ('c', 'a'): 1.,
        ('b', 'a'): 1.
    })
    H2Ot = array([[0, 1, 1], [1, 0, 0], [1, 0, 0]])
    H2O_labels = {0: 'O', 1: 'H', 2: 'H'}
    H2O_edge_labels = {
        (0, 1): 'pcb',
        (1, 0): 'pcb',
        (0, 2): 'pcb',
        (2, 0): 'pcb'
    }
    adj_graph = Graph(H2Ot, H2O_labels, H2O_edge_labels, "all")
    #==============================================================================
    sp_kernal = GraphKernel(kernel={"name": "shortest_path"}, normalize=True)
    kernal_m = sp_kernal.fit_transform([adj_graph])
    Sim = sp_kernal.transform([H3O])
    print("the kernal_m is :{m}\n the sim is :{s}".format(m=kernal_m, s=Sim))
Ejemplo n.º 16
0
G, y = mutag.data, mutag.target
C_grid = (10. ** np.arange(4,10,1) / len(G)).tolist()

niter = 10
kernel_names = ["lovasz_theta", "svm_theta"]
stats = {k: {"acc": list(), "time": list()} for k in kernel_names}

for i in range(niter):
    # Train-test split of graph data
    G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1)


    for kernel_name in kernel_names:
        start = time()
        # Initialise a weifeiler kernel, with a dirac base_kernel.
        gk = GraphKernel(kernel={"name": kernel_name}, normalize=True)

        # Calculate the kernel matrix.
        K_train = gk.fit_transform(G_train)
        K_test = gk.transform(G_test)
        end = time()

        # Cross validation on C, variable
        acc = 0
        for c in C_grid:
            # Initialise an SVM and fit.
            clf = svm.SVC(kernel='precomputed', C=c)

            # Fit on the train Kernel
            clf.fit(K_train, y_train)
Ejemplo n.º 17
0
def test_subgraph_matching_pd():
    """Random input test for the Subgraph Matching kernel [n_jobs=-1/generic-wrapper]."""
    # node-label/edge-label
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3, 'el', 4))

    gk = GraphKernel(kernel={"name": "SM"},
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-label/edge-attribute
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('nl', 3, 'ea', 5))

    gk = GraphKernel(kernel={
        "name": "SM",
        "ke": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-attribute/edge-label
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 4, 'el', 3))

    gk = GraphKernel(kernel={
        "name": "SM",
        "kv": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    # node-attribute/edge-attribute
    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 4, 'ea', 6))

    gk = GraphKernel(kernel={
        "name": "SM",
        "kv": np.dot,
        "ke": np.dot
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 18
0
from Utils import *
from numpy import array
from grakel import graph_from_networkx
if __name__ == '__main__':
    low_version = "F:\GraphSim\jsondata\V1.0"
    high_version = "F:\GraphSim\jsondata\V1.1"
    base_file_list = []
    target_file_list = []
    pairfileList = []
    getfilePath(low_version, base_file_list)
    getfilePath(high_version, target_file_list)

    pairfileList = getpairFile(base_file_list, target_file_list)
    for pair in pairfileList:
        basefile = pair[0]
        targetfile = pair[1]
        g1 = ParseFile(basefile)
        g2 = ParseFile(targetfile)
        #basefileGraph、targetfileGraph分别为待比较结点得图
        _basefileGraph = g1.connectFile()
        _targetfileGraph = g2.connectFile()
        adj1, node_label1, edge_label1 = getadjlist(_basefileGraph)
        adj2, node_label2, edge_label2 = getadjlist(_targetfileGraph)
        sp_kernal = GraphKernel(kernel={"name": "shortest_path"},
                                normalize=True)
        g1 = Graph(adj1, node_label1, edge_label1)
        g2 = Graph(adj2, node_label2, edge_label2)
        tp = sp_kernal.fit_transform([g1])
        sim = sp_kernal.transform([g2])
    print("kernal_Done!")
Ejemplo n.º 19
0
def test_propagation_pd():
    """Random input test for the Propagation kernel [n_jobs=-1/generic-wrapper]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(float("1e-5"), 10),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 4))

    gk = GraphKernel(kernel="PR",
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(float("1e-5"), 10),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel={
        "name": "PR",
        "with_attributes": True
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 20
0
def test_shortest_path_pd():
    """Random input test for the Shortest Path kernel [n_jobs=-1 (for attributed)/decorator]."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    gk = GraphKernel(kernel="SP", verbose=verbose, normalize=normalize)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception

    train, test = generate_dataset(n_graphs=50,
                                   r_vertices=(5, 10),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=20,
                                   random_state=rs,
                                   features=('na', 5))

    gk = GraphKernel(kernel={
        "name": "SP",
        "as_attributes": True
    },
                     verbose=verbose,
                     normalize=normalize,
                     n_jobs=-1)

    try:
        gk.fit_transform(train)
        gk.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
Ejemplo n.º 21
0
    def worker(work):
        for graph_location in tqdm(work, leave=False):
            chain = os.path.basename(
                os.path.dirname(os.path.dirname(graph_location)))
            q = int(os.path.basename(os.path.dirname(graph_location)))
            graph = obj_dict_to_networkx(read_graph_from_dot(graph_location))
            graph = remove_all_except(graph, edges_kept)

            if len(graph.nodes) == 0:
                continue

            t0 = time.perf_counter()
            for i in range(times):
                seeds, list_of_graphs = deltaPDG_to_list_of_Graphs(
                    graph, khop_k=k_hop)
                wl_subtree = GraphKernel(kernel=[{
                    "name": "weisfeiler_lehman",
                    "n_iter": 10
                }, {
                    "name": "subtree_wl"
                }],
                                         normalize=True)
                if len(list_of_graphs) > 0:
                    similarities = defaultdict(lambda: (0, 0.0))
                    for g1, g2 in itertools.combinations(list_of_graphs, 2):
                        # The graph has to be converted to {Graph, Node_Labels, Edge_Labels}
                        wl_subtree.fit([
                            graph_to_grakel(g1, with_data, with_call,
                                            with_name)
                        ])
                        similarity = wl_subtree.transform([
                            graph_to_grakel(g2, with_data, with_call,
                                            with_name)
                        ])[0][0]
                        similarities[(list_of_graphs.index(g1),
                                      list_of_graphs.index(g2))] = similarity

                    n = len(list_of_graphs)
                    affinity = np.zeros(
                        shape=(scipy.special.comb(n, 2, exact=True), ))
                    args = list(enumerate(itertools.combinations(range(n), 2)))
                    with ThreadPool(processes=min(os.cpu_count() -
                                                  1, 1)) as wp:
                        for k, value in wp.imap_unordered(
                                lambda i: (i[0], similarities[
                                    (i[-1][0], i[-1][1])]), args):
                            affinity[k] += (
                                1 - value
                            )  # affinity is distance! so (1 - sim)

                    cluster = AgglomerativeClustering(n_clusters=None,
                                                      distance_threshold=0.5,
                                                      affinity='precomputed',
                                                      linkage='complete')
                    if len(affinity) < 2:
                        if len(affinity) == 1:
                            labels = np.asarray([
                                0, 0
                            ]) if affinity[0] <= 0.5 else np.asarray([0, 1])
                        else:
                            labels = np.asarray([0])
                    else:
                        labels = cluster.fit_predict(
                            scipy.spatial.distance.squareform(affinity))
                else:
                    labels = None
            t1 = time.perf_counter()
            time_ = (t1 - t0) / times

            truth = list()
            label = list()
            for node, data in graph.nodes(data=True):
                if 'color' in data.keys():
                    if 'community' in data.keys():
                        truth.append(int(data['community']))
                        i = seeds.index(node) if node in seeds else -1

                        if labels is not None and i != -1:
                            data['label'] = '%d: ' % labels[i] + data['label']
                            label.append(labels[i])
                            graph.add_node(node, **data)
                        else:
                            data['label'] = '-1: ' + data['label']
                            label.append(-1)
                            graph.add_node(node, **data)

            nx.drawing.nx_pydot.write_dot(
                graph, graph_location[:-4] + '_output_wl_%d.dot' % k_hop)

            truth = np.asarray(truth)
            label = np.asarray(label)
            acc, overlap = evaluate(truth[label > -1],
                                    label[label > -1],
                                    q=1 if len(label) == 0 else np.max(label) +
                                    1)
            with open(
                    './out/%s/wl_%s_%d_results_%s.csv' %
                (repository_name, edges_kept, k_hop, suffix), 'a') as f:
                f.write(chain + ',' + str(q) + ',' + str(acc) + ',' +
                        str(overlap) + ',' + str(time_) + '\n')
Graph Kernel有很多种。常见的分为三类:
  基于树的,
  基于路径的,
  基于子图的
  WL核可以基于树构建,也可以基于路径构建,还可以基于子图构建。
  Informally, a kernel is a function of two objects that quantifies their similarity.
  Mathematically, it corresponds to an inner product in a reproducing kernel Hilbert space.

  Graph Kernel 都来基于 R-convolution kernel 理论: Convolution Kernels on Discrete Structures, David Haussler, 1999
  
'''
from grakel import GraphKernel, datasets

wl_kernel = GraphKernel(kernel=[{
    "name": "weisfeiler_lehman"
}, {
    "name": "subtree_wl"
}])
H2O = [[[[0, 1, 1], [1, 0, 0], [1, 0, 0]], {0: 'O', 1: 'H', 2: 'H'}]]
H3O = [[[[0, 1, 1, 1], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], {
    0: 'O',
    1: 'H',
    2: 'H',
    3: 'H'
}]]
two = [H2O[0], H3O[0]]
# k1 = wl_kernel.fit_transform(H2O)
# print(k1)
# k2 = wl_kernel.transform(H3O)
# print(k2)
# k3 = wl_kernel.fit_transform(two)
def spk_isomap(X,y, k, KNNstart, KNNend, Dstart, Dend, svmC):

    filename = "accuracy.txt"

    myfile = open(filename, 'a')

    # Add info to file
    myfile.write('SP Isomap accuracy: K = %d-%d, D = %d-%d, C = %d, K-fold = %d\n'
                 % (KNNstart, KNNend, Dstart, Dend, svmC, k))

    KNN = []
    KNNrange = KNNend - KNNstart+1
    D = []
    Drange = Dend - Dstart+1

    for knn in range(KNNrange):
        KNN.append( knn + KNNstart)


    for d in range(Drange):
        D.append(d + Dstart)


    kf = KFold(n_splits=k)
    scores = []

    Z = np.ndarray(shape=( len(D) , len(KNN) ))

    for knn in range(len(KNN)):
        for d in range(len(D)):

            for train_index, test_index in kf.split(X):

                kernel = GraphKernel(kernel={"name": "shortest_path", "with_labels": False}, normalize=True)

                # split train and test of K-fold
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                # Calculate the kernel matrix.
                K_train = kernel.fit_transform(X_train)
                K_test = kernel.transform(X_test)

                # Compute distance matrix
                D_train = compute_distance_matrix(K_train)
                D_test = compute_distance_matrix(K_test)

                # Initialize Isomap embedding object, embed train and test data
                embedding = manifold.Isomap(n_neighbors=KNN[knn], n_components=D[d], metric="precomputed")
                E_train = embedding.fit_transform(D_train)
                E_test = embedding.transform(D_test)

                # initialize second svm (not necessary? search documentation)
                clf2 = svm.SVC(kernel='linear', C=svmC)
                clf2.fit(E_train, y_train)

                # Predict and test.
                y_pred = clf2.predict(E_test)

                # Append accuracy of classification.
                scores.append(accuracy_score(y_test, y_pred))

            val = np.mean(scores)
            Z[d][knn] = val
            myfile.write("%f " % (val))
            print("knn = ", KNN[knn], "d = ", D[d], " accuracy = ", Z[d][knn])
            print("{0:.2%} done".format((Drange*knn+d+1.0)/(Drange*KNNrange)))
            # print("{0:.2%} done".format((D*k+d + 1.0)/(D*KNN) ))
        myfile.write("\n")
    # Close the file
    myfile.close()
    return Z
Ejemplo n.º 24
0
    G_rw, G_sm, y = read_data(3)
    N = len(G_rw[0])

    labels = {'1': 'NC', '2': 'MCI', '3': 'AD'}
    rw_ac = []
    sm_ac = []
    for iter in range(3):
        print("Iter: ", iter)
        # Train-test split of graph data
        G_train_rw, G_test_rw, y_train_rw, y_test_rw = prepare_data(G_rw, y, random_state=iter)
        G_train_sm, G_test_sm, y_train_sm, y_test_sm = prepare_data(G_sm, y, random_state=iter)

        print("Data Set prepared")
        for (i, k) in enumerate(rows):
            print(k, end=" ")
            gk = GraphKernel(kernel=kernels[k], normalize=True)
            print("", end=".")

            # Calculate the kernel matrix for raw data
            start = time.time()
            K_train_rw = gk.fit_transform(G_train_rw)
            K_test_rw = gk.transform(G_test_rw)
            end = time.time()
            print("", end=".")

            # Initialise an SVM and fit.
            clf = svm.SVC(kernel='precomputed')
            clf.fit(K_train_rw, y_train_rw)
            print("", end=". ")

            # Predict and test.
# the biggest collection of benchmark datasets for graph_kernels.
mutag = datasets.fetch_dataset("MUTAG", verbose=False)
G, y = mutag.data, mutag.target

# Train-test split of graph data
G_train, G_test, y_train, y_test = train_test_split(G,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=42)

start = time()
# Initialise a weifeiler kernel, with a dirac base_kernel.
gk = GraphKernel(kernel=[{
    "name": "weisfeiler_lehman",
    "niter": 5
}, {
    "name": "subtree_wl"
}],
                 normalize=True)

# Calculate the kernel matrix.
K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)
end = time()

# Initialise an SVM and fit.
clf = svm.SVC(kernel='precomputed', C=1)
clf.fit(K_train, y_train)

# Predict and test.
y_pred = clf.predict(K_test)
Ejemplo n.º 26
0
    dataset_d = datasets.fetch_dataset(d,
                                       verbose=False,
                                       data_home="../dataset",
                                       produce_labels_nodes=True)
    G, y = np.asarray(dataset_d.data), np.asarray(dataset_d.target)

    stats = {m: {"acc": list(), "time": list()} for m in Methods}

    kfold = KFold(n_splits=10, random_state=50, shuffle=True)

    for train_idx, test_idx in kfold.split(G, y):
        train_g, train_y = G[train_idx], y[train_idx]
        test_g, test_y = G[test_idx], y[test_idx]

        for i, k in enumerate(Methods):
            gk = GraphKernel(kernel=kernels[k], normalize=True)

            start = time.time()
            k_train = gk.fit_transform(train_g)
            k_test = gk.transform(test_g)
            end = time.time()

            clf = svm.SVC(kernel='precomputed')
            clf.fit(k_train, train_y)

            pred_y = clf.predict(k_test)

            stats[k]["acc"].append(accuracy_score(test_y, pred_y))
            stats[k]["time"].append(end - start)

    for m in Methods:
Ejemplo n.º 27
0
def cross_validation_with_and_without_manifold(X, y, n_neighbors, n_components,
                                               k, C):
    # Split indexes according to Kfold with k = 10
    kf = KFold(n_splits=k)

    # initialize scores lists
    scores = []
    scores2 = []
    for train_index, test_index in kf.split(X):
        kernel = GraphKernel(kernel={
            "name": "shortest_path",
            "with_labels": False
        },
                             normalize=True)

        # split train and test of K-fold
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Calculate the kernel matrix.
        K_train = kernel.fit_transform(X_train)
        K_test = kernel.transform(X_test)

        # Initialise an SVM and fit.
        clf = svm.SVC(kernel='precomputed', C=C)
        clf.fit(K_train, y_train)

        # Predict and test.
        y_pred = clf.predict(K_test)

        # Calculate accuracy of classification.
        acc = accuracy_score(y_test, y_pred)
        scores.append(acc)

        # Compute distance matrix
        D_train = compute_distance_matrix(K_train)
        D_test = compute_distance_matrix(K_test)

        # Initialize Isomap embedding object, embed train and test data
        embedding = manifold.Isomap(n_neighbors,
                                    n_components,
                                    metric="precomputed")
        E_train = embedding.fit_transform(D_train)
        E_test = embedding.transform(D_test)

        # initialize second svm (not necessary? search documentation)
        clf2 = svm.SVC(kernel='linear', C=C)
        clf2.fit(E_train, y_train)

        # Predict and test.
        y_pred = clf2.predict(E_test)

        # Calculate accuracy of classification.
        acc = accuracy_score(y_test, y_pred)
        scores2.append(acc)
    for i, _ in enumerate(scores):
        scores[i] = scores[i] * 100

    for i, _ in enumerate(scores2):
        scores2[i] = scores2[i] * 100
    return scores, scores2