def test_ae_static(): # Parameters for Stochastic block model graph # Todal of 1000 nodes node_num = 100 # Test with two communities community_num = 2 # At each iteration migrate 10 nodes from one community to the another node_change_num = 2 # Length of total time steps the graph will dynamically change length = 7 # output directory for result outdir = './output' intr = './intermediate' if not os.path.exists(outdir): os.mkdir(outdir) if not os.path.exists(intr): os.mkdir(intr) testDataType = 'sbm_cd' # Generate the dynamic graph dynamic_sbm_series = list(sbm.get_community_diminish_series_v2(node_num, community_num, length, 1, # comminity ID to perturb node_change_num)) graphs = [g[0] for g in dynamic_sbm_series] # parameters for the dynamic embedding # dimension of the embedding dim_emb = 8 lookback = 2 # AE Static embedding = AE(d=dim_emb, beta=5, nu1=1e-6, nu2=1e-6, K=3, n_units=[500, 300], n_iter=2, xeta=1e-4, n_batch=100, modelfile=['./intermediate/enc_modelsbm.json', './intermediate/dec_modelsbm.json'], weightfile=['./intermediate/enc_weightssbm.hdf5', './intermediate/dec_weightssbm.hdf5']) embs = [] t1 = time() # ae static for temp_var in range(length): emb, _ = embedding.learn_embeddings(graphs[temp_var]) embs.append(emb) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) viz.plot_static_sbm_embedding(embs[-4:], dynamic_sbm_series[-4:])
node_change_num)) graphs = [g[0] for g in dynamic_sbm_series] # parameters for the dynamic embedding # dimension of the embedding dim_emb = 128 lookback = 2 #AE Static embedding = AE(d = dim_emb, beta = 5, nu1 = 1e-6, nu2 = 1e-6, K = 3, n_units = [500, 300, ], n_iter = 200, xeta = 1e-4, n_batch = 100, modelfile = ['./intermediate/enc_modelsbm.json', './intermediate/dec_modelsbm.json'], weightfile = ['./intermediate/enc_weightssbm.hdf5', './intermediate/dec_weightssbm.hdf5']) embs = [] t1 = time() #ae static for temp_var in range(length): emb, _= embedding.learn_embeddings(graphs[temp_var]) embs.append(emb) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) viz.plot_static_sbm_embedding(embs[-4:], dynamic_sbm_series[-4:])
def main(args): # Set seeds np.random.seed(args.seed) from tensorflow import set_random_seed set_random_seed(args.seed) # Set the number of timesteps in the sequence num_timesteps = args.seq_len - 1 # one timestep per pair of consecutive graphs num_training_loops = num_timesteps - 1 # Num training loops to actually do (keep last graph for test/validation) data_loc = os.path.join(args.data_loc, args.dataset) # Preload the training graphs into memory...not very scaleable but helps with CPU load # Preload all but the last graph as this is use for val/test graphs = [] for i in range(num_timesteps): adj_train, features = third_party_utils.load_adj_graph( f'{data_loc}_t{i}.npz') # Load the input graph graphs.append( nx.from_scipy_sparse_matrix(adj_train, create_using=nx.DiGraph())) print(f'{args.dataset}_t{i} Loaded') assert len( graphs ) == num_timesteps #Should be the length of the time series as the index will start from zero print("Training graphs loaded into memory") # Extract the val/test graph which is the final one in the sequence val_test_graph_previous, _ = third_party_utils.load_adj_graph( f'{data_loc}_t{num_timesteps-1}.npz') val_test_graph, _ = third_party_utils.load_adj_graph( f'{data_loc}_t{num_timesteps}.npz') val_test_graph_adj, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = third_party_utils.mask_test_edges( val_test_graph) val_test_graph_adj, train_edges_pre, val_edges_pre, val_edges_false, test_edges_pre, test_edges_false = third_party_utils.mask_test_edges( val_test_graph_previous) pos_edges = np.concatenate((val_edges, test_edges, train_edges)).tolist() pos_edges = set(map(tuple, pos_edges)) pos_edges_pre = np.concatenate( (val_edges_pre, test_edges_pre, train_edges_pre)).tolist() pos_edges_pre = set(map(tuple, pos_edges_pre)) new_edges = np.array(list(pos_edges - pos_edges_pre)) num_edges = len(new_edges) new_edges_false = test_edges[:num_edges] print( f"Validation and Test edges capture from graph {args.dataset}_t{args.seq_len-1} in the sequence" ) # Chose the model to run #AE Static ---------------------------------------------------------------------------- # None offset auto encoder seems to be if args.model == "AE": embedding = AE(d=dim_emb, beta=5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 500, 300, ], n_iter=100, xeta=1e-6, n_batch=100, modelfile=[ './intermediate/enc_modelsbm.json', './intermediate/dec_modelsbm.json' ], weightfile=[ './intermediate/enc_weightssbm.hdf5', './intermediate/dec_weightssbm.hdf5' ]) t1 = time() #ae static # Loop through each of the graphs in the time series and train model print("Starting training AE") # for temp_var in range(num_training_loops): # emb, _= embedding.learn_embeddings(graphs[temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding)) accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) #dynAE ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Seems to just be an offset dense auto encoder trained to predict next graph. elif args.model == "DynAE": embedding = DynAE(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_units=[ 500, 300, ], rho=0.3, n_iter=150, xeta=1e-5, n_batch=100, modelfile=[ './intermediate/enc_model_dynAE.json', './intermediate/dec_model_dynAE.json' ], weightfile=[ './intermediate/enc_weights_dynAE.hdf5', './intermediate/dec_weights_dynAE.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # print(temp_var) # print(graphs[:temp_var]) # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: print("Here yo") accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) #dynRNN ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Only seems to use LSTM cells with no compression beforehand. elif args.model == "DynRNN": embedding = DynRNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_enc_units=[500, 200], n_dec_units=[200, 500], rho=0.3, n_iter=150, xeta=1e-4, n_batch=100, modelfile=[ './intermediate/enc_model_dynRNN.json', './intermediate/dec_model_dynRNN.json' ], weightfile=[ './intermediate/enc_weights_dynRNN.hdf5', './intermediate/dec_weights_dynRNN.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) #dynAERNN ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Use auto encoder before passing to an LSTM cell. elif args.model == "DynAERNN": embedding = DynAERNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_aeunits=[500, 300], n_lstmunits=[300, dim_emb], rho=0.3, n_iter=150, xeta=1e-3, n_batch=100, modelfile=[ './intermediate/enc_model_dynAERNN.json', './intermediate/dec_model_dynAERNN.json' ], weightfile=[ './intermediate/enc_weights_dynAERNN.hdf5', './intermediate/dec_weights_dynAERNN.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) #lp.expLP(graphs, embedding, 2, 0, 0) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) return accuracy, roc_score, ap_score, tn, fp, fn, tp, ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp
node_num = 100 community_num = 2 node_change_num = args.nodemigration dynamic_sbm_series = dynamic_SBM_graph.get_community_diminish_series_v2( node_num, community_num, length, 1, node_change_num) embedding = AE(d=dim_emb, beta=5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 500, 300, ], n_iter=epochs, xeta=1e-4, n_batch=100, modelfile=[ './intermediate/AE_enc_modelsbm.json', './intermediate/AE_dec_modelsbm.json' ], weightfile=[ './intermediate/AE_enc_weightssbm.hdf5', './intermediate/AE_dec_weightssbm.hdf5' ]) graphs = [g[0] for g in dynamic_sbm_series] embs = [] outdir = args.resultdir if not os.path.exists(outdir):
testDataType = testDataType + str(dim_emb) if not os.path.exists(os.path.join(outdir, testDataType)): os.mkdir(os.path.join(outdir, testDataType)) #AE Static # xeta is learning ratio embedding = AE( d=dim_emb, beta=5, nu1=1e-6, nu2=1e-6, K=3, # hidden_layer_num n_units=[ 500, 300, ], n_iter=300, #500 # epoch xeta=1e-3, #1e-4, #1e-6, # lr n_batch=num_nodes, #100, modelfile=[ './intermediate/enc_modelsbm.json', './intermediate/dec_modelsbm.json' ], weightfile=[ './intermediate/enc_weightssbm.hdf5', './intermediate/dec_weightssbm.hdf5' ]) embs = [] t1 = time.time() error = [] #outdir = outdir+testDataType #ae static for temp_var in range(num_sample):