def test_fit(self): """ Test if fit function results in the same results as when trained separately """ graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) train_res = {} for i in range(len(gae.dims)): train_res["l" + str(i + 1)] = gae.train_layer(i + 1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True, dropout=None) embed = gae.calculate_embeddings() gae2 = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae2.fit(graph) embed2 = gae2.calculate_embeddings() embed3 = np.subtract(embed, embed2) self.assertAlmostEqual( np.sum(embed3), 0, 4, "fit method results in a different model when trained separately")
def test_save_load(self): """ Test if saving and loading the model in a new object gives the same results """ filename = os.getcwd() + "/data/test_save_load" graph = gb.create_directed_barbell(4, 4) gae = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae.fit(graph) embed = gae.calculate_embeddings() gae.save_model(filename) gae2 = GraphAutoEncoder(graph, learning_rate=0.01, support_size=[5, 5], dims=[3, 5, 7, 6, 2], batch_size=12, max_total_steps=50, verbose=True) gae2.load_model(filename, graph) embed2 = gae2.calculate_embeddings() embed3 = np.subtract(embed, embed2) self.assertAlmostEqual( np.sum(embed3), 0, 4, "loaded model gives different result then original")
def test_train_layer5(self): """ Test using final combination layer. Test if training works correctly and if the calculation of the embeddings works correctly. """ graph = gb.create_directed_barbell(4, 4) for in_node, out_node, lbl in graph.edges(data=True): lbl['edge_lbl1'] = in_node / (out_node + 0.011) + 0.22 gae = GraphAutoEncoder(graph, support_size=[3, 3], dims=[2, 3, 3, 2, 2], batch_size=3, max_total_steps=10, verbose=False, seed=2, weight_label='edge_lbl1', act=tf.nn.relu) for i in range(len(gae.dims)): res = gae.train_layer(i + 1, act=tf.nn.relu) self.assertAlmostEqual( res['l'][0], 134.9637, 4, "loss of the last layer does not match with expectations using a \ final combination layer") res = gae.train_layer(len(gae.dims), all_layers=True, act=tf.nn.relu) embed = gae.calculate_embeddings() self.assertAlmostEqual( embed[0][2], 38.221458435058594, 4, "embedding of the first batch node differs from expected value")
def create_embedding(self, mdl, date_range=range(1, 25)): gae = None combined_feat = None for dag in date_range: print(f"processing dag {dag}") node, edge = self.proces_month(dag) cnt = node.shape[0] G = self.create_graph(node, edge) if gae is None: dims = self.get_dims(int(mdl.split("_")[1])) act = tf.nn.sigmoid if mdl.split( "_")[7] == 'sigm' else tf.nn.tanh do = mdl.split("_")[5] gae = GraphAutoEncoder( G, support_size=AmlSimPreprocessor.support_size, dims=dims, batch_size=AmlSimPreprocessor.batch_size, hub0_feature_with_neighb_dim=AmlSimPreprocessor. hub0_feature_with_neighb_dim, useBN=AmlSimPreprocessor.useBN, verbose=False, seed=1, learning_rate=0.01, act=act, dropout=do) gae.load_weights(self.out_dir + 'mdl_' + mdl) embed = gae.calculate_embeddings(G) #combine with nodes if AmlSimPreprocessor.hub0_feature_with_neighb_dim is None: pd_embed = pd.DataFrame( data=embed[:cnt, 1:], index=embed[:cnt, 0], columns=[f'embed_{i}' for i in range(dims[-1] * 2)]) else: pd_embed = pd.DataFrame( data=embed[:cnt, 1:], index=embed[:cnt, 0], columns=[f'embed_{i}' for i in range(dims[-1])]) feat = pd.merge(node, pd_embed, left_index=True, right_index=True, how='inner') if combined_feat is None: combined_feat = feat else: combined_feat = pd.concat([combined_feat, feat]) feat_file = self.out_dir + "features_" + str(dims[-1]) combined_feat.to_parquet(feat_file) # return column list excl_cols = ['is_sar', 'dag', 'orig_id', 'id'] feat_cols = [c for c in combined_feat.columns if c not in excl_cols] return feat_file, feat_cols
batch_size=30, max_total_steps=1000, verbose=True, act=tf.nn.tanh) if TRAIN: train_res = {} for i in range(len(gae.dims)): if i in [1, 2]: train_res["l"+str(i+1)] = gae.train_layer(i+1, dropout=0.1) else: train_res["l"+str(i+1)] = gae.train_layer(i+1) train_res['all'] = gae.train_layer(len(gae.dims), all_layers=True) pickle.dump(train_res, open(RESULTS_FILE, "wb")) gae.save_model(MODEL_FILENAME) else: gae.load_model(MODEL_FILENAME, graph) embed = gae.calculate_embeddings() # %% get tabel with node details indeg = graph.in_degree() outdeg = graph.out_degree() tbl = np.array([[y, x['label1'], x['label2'], indeg[y], outdeg[y], embed[y, 1], embed[y, 2]] for y, x in graph.nodes(data=True)]) pd_tbl = pd.DataFrame(tbl[:, 1:], tbl[:, 0], ['label1', 'label2', 'in_degree', 'out_degree', 'embed1', 'embed2']) print(pd_tbl) #%% show graph pos = nx.kamada_kawai_layout(graph, scale=10, weight=None) node_count = graph.number_of_nodes() outdeg = graph.out_degree()