def test_reconstruct_graph(self): """ Test the reconstruction of an inputlayer. """ graph = gb.create_directed_barbell(10, 10) random.seed(2) for u in graph.nodes(data=True): u[1]['label1'] = int(u[0]) u[1]['label2'] = random.uniform(0.0, 1.0) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=100, verbose=True) l1_struct, graph2 = gae.get_l1_structure(15, show_graph=False) # check if the nodes of the reconstructed graph is equal to 5 self.assertEqual( graph2.number_of_nodes(), 5, "Number of nodes in reconstructed graph does not match with expectations" ) # check if the returned nodes are correct by summing the node values. sum_values = np.sum(l1_struct, 1) self.assertAlmostEqual( sum_values[0, 1], 120, 4, "sum of nodes ids in reconstructed graph does not match with expectations" ) self.assertAlmostEqual( sum_values[0, 0], 2.399999, 4, "sum of edges in reconstructed graph does not match with expectations" )
def train_model(params): """ function to create and train the model. This is called by hyper opt. It returned the loss (=optimisation metric), status and a dict with supporting information. """ dims = [ int(params['dim0']), int(params['dim1']), int(params['dim2']), int(params['dim3']), 2 ] gae = GraphAutoEncoder(G, learning_rate=0.01, support_size=[5, 5], dims=dims, batch_size=12, max_total_steps=250) train_res = {} for i in range(len(gae.dims)): train_res["l" + str(i + 1)] = gae.train_layer(i + 1, act=tf.nn.relu) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) loss_val = train_res['all']['val_l'][-3:] print(f"loss val {loss_val}") loss = sum(loss_val) / len(loss_val) train_res['loss'] = loss return {'loss': loss, 'status': STATUS_OK, 'hist': train_res}
def test_train_layer3(self): """ Test with 3 hubs sampling using different support sizes per layer. """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, support_size=[3, 4, 5], dims=[2, 3, 3, 3, 3, 2], batch_size=3, max_total_steps=1, verbose=False, seed=2, act=tf.nn.relu) exp = [ 153.83647, 309.56152, 311.00153, 459.34726, 484.33817, 504.59387 ] for i in range(6): res = gae.train_layer(i + 1) self.assertAlmostEqual( res['l'][0], exp[i], 4, f"loss of layer {i+1} does not match with expectations") res = gae.train_layer(6, all_layers=True) self.assertAlmostEqual( res['l'][0], 504.55478, 4, "loss of the layer 6 all traning does not match with expectations")
def test_fit(self): """ Test if fit function results in the same results as when trained separately """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) train_res = {} for i in range(len(gae.dims)): train_res["l" + str(i + 1)] = gae.train_layer(i + 1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, dropout=None) embed = gae.calculate_embeddings() gae2 = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae2.fit(graph) embed2 = gae2.calculate_embeddings() embed3 = np.subtract(embed, embed2) self.assertAlmostEqual( np.sum(embed3), 0, 4, "fit method results in a different model when trained separately")
def test_train_layer5(self): """ Test using final combination layer. Test if training works correctly and if the calculation of the embeddings works correctly. """ graph = gb.create_directed_barbell(4, 4) for in_node, out_node, lbl in graph.edges(data=True): lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22 gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, weight_label='edge_lbl1', act=tf.nn.relu) for i in range(len(gae.dims)): res = gae.train_layer(i + 1, act=tf.nn.relu) self.assertAlmostEqual( res['l'][0], 134.9637, 4, "loss of the last layer does not match with expectations using a \ final combination layer") res = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) embed = gae.calculate_embeddings() self.assertAlmostEqual( embed[0][2], 38.221458435058594, 4, "embedding of the first batch node differs from expected value")
def create_embedding(self, mdl, date_range=range(1, 25)): gae = None combined_feat = None for dag in date_range: print(f"processing dag {dag}") node, edge = self.proces_month(dag) cnt = node.shape[0] G = self.create_graph(node, edge) if gae is None: dims = self.get_dims(int(mdl.split("_")[1])) act = tf.nn.sigmoid if mdl.split( "_")[7] == 'sigm' else tf.nn.tanh do = mdl.split("_")[5] gae = GraphAutoEncoder( G, support_size=AmlSimPreprocessor.support_size, dims=dims, batch_size=AmlSimPreprocessor.batch_size, hub0_feature_with_neighb_dim=AmlSimPreprocessor. hub0_feature_with_neighb_dim, useBN=AmlSimPreprocessor.useBN, verbose=False, seed=1, learning_rate=0.01, act=act, dropout=do) gae.load_weights(self.out_dir + 'mdl_' + mdl) embed = gae.calculate_embeddings(G) #combine with nodes if AmlSimPreprocessor.hub0_feature_with_neighb_dim is None: pd_embed = pd.DataFrame( data=embed[:cnt, 1:], index=embed[:cnt, 0], columns=[f'embed_{i}' for i in range(dims[-1] * 2)]) else: pd_embed = pd.DataFrame( data=embed[:cnt, 1:], index=embed[:cnt, 0], columns=[f'embed_{i}' for i in range(dims[-1])]) feat = pd.merge(node, pd_embed, left_index=True, right_index=True, how='inner') if combined_feat is None: combined_feat = feat else: combined_feat = pd.concat([combined_feat, feat]) feat_file = self.out_dir + "features_" + str(dims[-1]) combined_feat.to_parquet(feat_file) # return column list excl_cols = ['is_sar', 'dag', 'orig_id', 'id'] feat_cols = [c for c in combined_feat.columns if c not in excl_cols] return feat_file, feat_cols
def gs_graphcase(self, G, dim_size): gs_res = {} dims = self.get_dims(dim_size) for lr in AmlSimPreprocessor.learning_rates: for do in AmlSimPreprocessor.dropout_rates: for act in AmlSimPreprocessor.act_functions: gae = GraphAutoEncoder( G, support_size=AmlSimPreprocessor.support_size, dims=dims, batch_size=AmlSimPreprocessor.batch_size, hub0_feature_with_neighb_dim=AmlSimPreprocessor. hub0_feature_with_neighb_dim, useBN=AmlSimPreprocessor.useBN, verbose=True, seed=1, learning_rate=lr, act=act, dropout=do) train_res = gae.fit(epochs=AmlSimPreprocessor.epochs, layer_wise=False) # save results act_str = 'tanh' if act == tf.nn.tanh else 'sigm' run_id = f'dim_{dim_size}_lr_{lr}_do_{do}_act_{act_str}_layers_{self.layers}' pickle.dump(train_res[None].history, open(self.out_dir + 'res_' + run_id, "wb")) gae.save_weights(self.out_dir + 'mdl_' + run_id) # print and store result val_los = sum(train_res[None].history['val_loss'][-2:]) / 2 gs_res[run_id] = val_los print( f'dims:{dim_size}, lr:{lr}, dropout lvl:{do}, act func:{act_str} resultsing val loss {val_los}' ) # print all results, save and return best model for k, v in gs_res.items(): print(f'run: {k} with result {v}') pickle.dump( gs_res, open(self.out_dir + f'graphcase_gs_results_dim_{dim_size}', "wb")) return max(gs_res, key=gs_res.get)
def test_train_layer2(self): """ Test if the loss is reduced during training """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, act=tf.nn.relu) res = gae.train_layer(1, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 1") res = gae.train_layer(2, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 2") res = gae.train_layer(3, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 3") res = gae.train_layer(4, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 4")
def test_consistency_checks(self): """ Test the checks during initializations. """ graph = gb.create_directed_barbell(10, 10) with self.assertRaises(AssertionError): gae = GraphAutoEncoder(graph, support_size=[5, 5], dims=[2, 6, 6], batch_size=1024, max_total_steps=10, verbose=True, seed=2)
def test_train_layer4(self): """ Test using multiple edge label icw a custom weight label. The test checks if the weights are calculated correct. """ graph = gb.create_directed_barbell(4, 4) for in_node, out_node, lbl in graph.edges(data=True): lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22 gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, weight_label='edge_lbl1', act=tf.nn.relu) res = gae.train_layer(1, learning_rate=0.0001) self.assertAlmostEqual( res['l'][0], 49.392754, 4, "loss of the layer 1 does not match with expectations using a \ custom edge label")
def test_train_layer(self): """ Test if the loss of the initial setup is correct. """ graph = gb.create_directed_barbell(4, 4) # ad node ids to the graph as label labels3 = [(i, i) for i in range(13)] labels3 = dict(labels3) nx.set_node_attributes(graph, labels3, 'label3') gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=1, verbose=False, seed=2, act=tf.nn.relu) res = gae.train_layer(1) self.assertAlmostEqual( res['l'][0], 2158.0686, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(2) self.assertAlmostEqual( res['l'][0], 2613.2725, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(3) self.assertAlmostEqual( res['l'][0], 2693.6736, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(4) self.assertAlmostEqual( res['l'][0], 2842.3582, 3, "loss of the initial setup does not match with expectations") res = gae.train_layer(4, all_layers=True) self.assertAlmostEqual( res['l'][0], 2842.1409, 4, "loss of the initial setup does not match with expectations")
#%% create graph graph = gb.create_directed_barbell(10, 10) graph.remove_edge(21, 20) graph.add_edge(29,20, weight=1) # correction edge weight for node # 20 ndic = graph.nodes(data='label1') for u, v, d in graph.edges(data=True): if(v > 9) & (v < 21): d['weight'] = 1 else: d['weight'] = ndic[u] * ndic[v] #%% create and train model gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=30, max_total_steps=1000, verbose=True, act=tf.nn.tanh) if TRAIN: train_res = {} for i in range(len(gae.dims)): if i in [1, 2]: train_res["l"+str(i+1)] = gae.train_layer(i+1, dropout=0.1) else: train_res["l"+str(i+1)] = gae.train_layer(i+1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True) pickle.dump(train_res, open(RESULTS_FILE, "wb")) gae.save_model(MODEL_FILENAME) else: gae.load_model(MODEL_FILENAME, graph) embed = gae.calculate_embeddings()
from GAE.graph_case_controller import GraphAutoEncoder import example_graph_bell as gb import networkx as nx import matplotlib.pyplot as plt import numpy as np import tensorflow as tf import pickle import random #%% graph = gb.create_directed_barbell(10, 10) random.seed(2) for u in graph.nodes(data=True): u[1]['label1'] = int(u[0]) u[1]['label2'] = random.uniform(0.0, 1.0) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=10, verbose=True, useBN=True) gae.fit() embed = gae.calculate_embeddings() l1_struct, graph2 = gae.get_l1_structure(15, show_graph=True, node_label='feat0') #%% # print(l1_struct) # train_res = {} # for i in range(len(gae.dims)): # train_res["l"+str(i+1)] = gae.train_layer(i+1) # train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, dropout=None) # embed = gae.calculate_embeddings() # filename = '/Users/tonpoppe/workspace/GraphCase/data/model1' # gae.save_model(filename)
# r = G.out_edges(data=True) # in_edges_dict = {} # in_weight_dict = {} # for out_node, in_node, weight in G.in_edges(data=True): # in_edges_dict[in_node] = in_edges_dict.get(in_node, list()) + \ # [(out_node, list(weight.values()))] # in_weight_dict[in_node] = in_weight_dict.get(in_node, list()) + \ # [(out_node, weight['weight'])] # print(in_edges_dict) # print(in_weight_dict) gae = GraphAutoEncoder(G, support_size=[3, 4], dims=[2, 6, 6, 2, 1], batch_size=5, max_total_steps=10, verbose=True, seed=2) for i in range(len(gae.dims)): h = gae.train_layer(i + 1, act=tf.nn.relu) h = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) # # print(h1['val_l']) e = gae.calculate_embeddings() print(f"e: \n {e}") # fig, ax = plt.subplots() # ax.scatter(e[:,1], e[:,2])
graph = karate.create_karakte_mirror_network({'weight': 'random'}, { 'label0': 1, 'label1': 'random' }) graph = karate.create_karakte_mirror_network({'weight': 'random'}, { 'label0': 1, 'label1': 'random' }) for node in graph.nodes(data=True): node[1]['label0'] = int(node[0]) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[3, 3], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=10, verbose=True, useBN=True) def plot_node(graph, node_id): local_graph = [] for neightbor in graph.neighbors(node_id): local_graph = local_graph + [n for n in graph.neighbors(neightbor)] local_graph = list(set(local_graph)) # make list unique subgraph = graph.subgraph(local_graph) # plot subgraph nt = net.Network(notebook=True, directed=True) nt.from_nx(subgraph)
def test_save_load(self): """ Test if saving and loading the model in a new object gives the same results """ filename = os.getcwd() + "/data/test_save_load" graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae.fit(graph) embed = gae.calculate_embeddings() gae.save_model(filename) gae2 = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae2.load_model(filename, graph) embed2 = gae2.calculate_embeddings() embed3 = np.subtract(embed, embed2) self.assertAlmostEqual( np.sum(embed3), 0, 4, "loaded model gives different result then original")