Ejemplo n.º 1
0
############
# Step 1
# Run the decomposition algorithm to get anonymous walk patterns across the graphs of MUTAG
############
graph_files = sorted(utils.get_files(corpus_data_dir, ".gexf", max_files=0))
corpus, vocabulary, prob_map, num_graphs, graph_map = awe_corpus(
    corpus_data_dir,
    awe_length=10,
    label_setting='nodes',
    neighborhood_size=10)

############
# Step 2
# Compute the kernel and use a kernel method to perform classification
############
# Simple MLE Kernel which does not use substructure embeddings
vocab_size = len(vocabulary)
vocabulary = list(sorted(vocabulary))
P = np.zeros((num_graphs, vocab_size))
for i in range(num_graphs):
    for jdx, j in enumerate(vocabulary):
        P[i][jdx] = prob_map[i + 1].get(j, 0)
K = P.dot(P.T)

class_labels_fname = "data/" + dataset + ".Labels"
xylabels = utils.get_class_labels_tuples(graph_files, class_labels_fname)
xylabels.sort(key=lambda tup: tup[0])
kernel_row_x_id, kernel_row_y_id = zip(*xylabels)

acc, std = cross_val_accuracy_rbf_bag_of_words(P, kernel_row_y_id)
print('#... Accuracy score: %0.4f, Standard deviation: %0.4f' % (acc, std))
Ejemplo n.º 2
0
    adj_matrix = nx.to_numpy_matrix(graph)
    return graph, adj_matrix


dataset = "MUTAG"
path_to_gexf_data = "data/"
graph_class_labels_fh = path_to_gexf_data + dataset + ".Labels"
dataset_path = path_to_gexf_data + dataset

# Yanardag style dataset
data = {}
labels = []
graph_files = {}

graph_files = utils.get_files(dataset_path, extension=".gexf", max_files=0)
label_tuples = utils.get_class_labels_tuples(graph_files,
                                             graph_class_labels_fh)
graph_classes = np.array(
    [y for z, y in sorted(label_tuples, key=lambda x: x[0])])
data['labels'] = graph_classes
gf = graph_files[0]

graph_data = {}

for gf in graph_files:
    gindex = int(os.path.basename(gf).split(".")[0]) - 1
    nx_graph, adj_matrix = load_graph(gf)

    graph_data[gindex] = {}
    for node_string in nx_graph.nodes():
        node_label = int(nx_graph.nodes[node_string]['Label'])
        node_id = int(node_string)