def train_model(params): """ function to create and train the model. This is called by hyper opt. It returned the loss (=optimisation metric), status and a dict with supporting information. """ dims = [ int(params['dim0']), int(params['dim1']), int(params['dim2']), int(params['dim3']), 2 ] gae = GraphAutoEncoder(G, learning_rate=0.01, support_size=[5, 5], dims=dims, batch_size=12, max_total_steps=250) train_res = {} for i in range(len(gae.dims)): train_res["l" + str(i + 1)] = gae.train_layer(i + 1, act=tf.nn.relu) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) loss_val = train_res['all']['val_l'][-3:] print(f"loss val {loss_val}") loss = sum(loss_val) / len(loss_val) train_res['loss'] = loss return {'loss': loss, 'status': STATUS_OK, 'hist': train_res}
def test_train_layer3(self): """ Test with 3 hubs sampling using different support sizes per layer. """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, support_size=[3, 4, 5], dims=[2, 3, 3, 3, 3, 2], batch_size=3, max_total_steps=1, verbose=False, seed=2, act=tf.nn.relu) exp = [ 153.83647, 309.56152, 311.00153, 459.34726, 484.33817, 504.59387 ] for i in range(6): res = gae.train_layer(i + 1) self.assertAlmostEqual( res['l'][0], exp[i], 4, f"loss of layer {i+1} does not match with expectations") res = gae.train_layer(6, all_layers=True) self.assertAlmostEqual( res['l'][0], 504.55478, 4, "loss of the layer 6 all traning does not match with expectations")
def test_train_layer2(self): """ Test if the loss is reduced during training """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, act=tf.nn.relu) res = gae.train_layer(1, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 1") res = gae.train_layer(2, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 2") res = gae.train_layer(3, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 3") res = gae.train_layer(4, learning_rate=0.0001) self.assertTrue(res['val_l'][0] > res['val_l'][-1], "loss has not decreased while training layer 4")
def test_fit(self): """ Test if fit function results in the same results as when trained separately """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) train_res = {} for i in range(len(gae.dims)): train_res["l" + str(i + 1)] = gae.train_layer(i + 1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, dropout=None) embed = gae.calculate_embeddings() gae2 = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae2.fit(graph) embed2 = gae2.calculate_embeddings() embed3 = np.subtract(embed, embed2) self.assertAlmostEqual( np.sum(embed3), 0, 4, "fit method results in a different model when trained separately")
def test_train_layer5(self): """ Test using final combination layer. Test if training works correctly and if the calculation of the embeddings works correctly. """ graph = gb.create_directed_barbell(4, 4) for in_node, out_node, lbl in graph.edges(data=True): lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22 gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, weight_label='edge_lbl1', act=tf.nn.relu) for i in range(len(gae.dims)): res = gae.train_layer(i + 1, act=tf.nn.relu) self.assertAlmostEqual( res['l'][0], 134.9637, 4, "loss of the last layer does not match with expectations using a \ final combination layer") res = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) embed = gae.calculate_embeddings() self.assertAlmostEqual( embed[0][2], 38.221458435058594, 4, "embedding of the first batch node differs from expected value")
def test_train_layer(self): """ Test if the loss of the initial setup is correct. """ graph = gb.create_directed_barbell(4, 4) # ad node ids to the graph as label labels3 = [(i, i) for i in range(13)] labels3 = dict(labels3) nx.set_node_attributes(graph, labels3, 'label3') gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=1, verbose=False, seed=2, act=tf.nn.relu) res = gae.train_layer(1) self.assertAlmostEqual( res['l'][0], 2158.0686, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(2) self.assertAlmostEqual( res['l'][0], 2613.2725, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(3) self.assertAlmostEqual( res['l'][0], 2693.6736, 4, "loss of the initial setup does not match with expectations") res = gae.train_layer(4) self.assertAlmostEqual( res['l'][0], 2842.3582, 3, "loss of the initial setup does not match with expectations") res = gae.train_layer(4, all_layers=True) self.assertAlmostEqual( res['l'][0], 2842.1409, 4, "loss of the initial setup does not match with expectations")
def test_train_layer4(self): """ Test using multiple edge label icw a custom weight label. The test checks if the weights are calculated correct. """ graph = gb.create_directed_barbell(4, 4) for in_node, out_node, lbl in graph.edges(data=True): lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22 gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, weight_label='edge_lbl1', act=tf.nn.relu) res = gae.train_layer(1, learning_rate=0.0001) self.assertAlmostEqual( res['l'][0], 49.392754, 4, "loss of the layer 1 does not match with expectations using a \ custom edge label")
# correction edge weight for node # 20 ndic = graph.nodes(data='label1') for u, v, d in graph.edges(data=True): if(v > 9) & (v < 21): d['weight'] = 1 else: d['weight'] = ndic[u] * ndic[v] #%% create and train model gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=30, max_total_steps=1000, verbose=True, act=tf.nn.tanh) if TRAIN: train_res = {} for i in range(len(gae.dims)): if i in [1, 2]: train_res["l"+str(i+1)] = gae.train_layer(i+1, dropout=0.1) else: train_res["l"+str(i+1)] = gae.train_layer(i+1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True) pickle.dump(train_res, open(RESULTS_FILE, "wb")) gae.save_model(MODEL_FILENAME) else: gae.load_model(MODEL_FILENAME, graph) embed = gae.calculate_embeddings() # %% get tabel with node details indeg = graph.in_degree() outdeg = graph.out_degree()
# [(out_node, list(weight.values()))] # in_weight_dict[in_node] = in_weight_dict.get(in_node, list()) + \ # [(out_node, weight['weight'])] # print(in_edges_dict) # print(in_weight_dict) gae = GraphAutoEncoder(G, support_size=[3, 4], dims=[2, 6, 6, 2, 1], batch_size=5, max_total_steps=10, verbose=True, seed=2) for i in range(len(gae.dims)): h = gae.train_layer(i + 1, act=tf.nn.relu) h = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) # # print(h1['val_l']) e = gae.calculate_embeddings() print(f"e: \n {e}") # fig, ax = plt.subplots() # ax.scatter(e[:,1], e[:,2]) # for i, txt in enumerate(e[:,0]): # ax.annotate(txt, (e[i,1], e[i,2])) # plt.xlabel("Leprechauns") # plt.ylabel("Gold") # plt.legend(loc='upper left') # plt.show()