def main(): # Parameters for Stochastic block model graph # Todal of 1000 nodes node_num = 1000 # Test with two communities community_num = 2 # At each iteration migrate 10 nodes from one community to the another node_change_num = 10 # Length of total time steps the graph will dynamically change length = 7 # output directory for result outdir = './output' intr = './intermediate' if not os.path.exists(outdir): os.mkdir(outdir) if not os.path.exists(intr): os.mkdir(intr) testDataType = 'sbm_cd' # Generate the dynamic graph dynamic_sbm_series = list(sbm.get_community_diminish_series_v2(node_num, community_num, length, 1, # comminity ID to perturb node_change_num)) graphs = [g[0] for g in dynamic_sbm_series] # parameters for the dynamic embedding # dimension of the embedding dim_emb = 8 lookback = 2 # dynAERNN embedding = DynAERNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_aeunits=[500, 300], n_lstmunits=[500, dim_emb], rho=0.3, n_iter=2, xeta=1e-3, n_batch=100, modelfile=['./intermediate/enc_model_dynAERNN.json', './intermediate/dec_model_dynAERNN.json'], weightfile=['./intermediate/enc_weights_dynAERNN.hdf5', './intermediate/dec_weights_dynAERNN.hdf5'], savefilesuffix="testing") embs = [] t1 = time() for temp_var in range(lookback + 1, length + 1): emb, _ = embedding.learn_embeddings(graphs[:temp_var]) embs.append(emb) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) plt.figure() plt.clf() plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:]) plt.show()
embs.append(emb) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) plt.figure() plt.clf() plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:]) plt.show() #dynAERNN embedding = DynAERNN(d = dim_emb, beta = 5, n_prev_graphs = lookback, nu1 = 1e-6, nu2 = 1e-6, n_aeunits = [500, 300], n_lstmunits = [500,dim_emb], rho = 0.3, n_iter = 250, xeta = 1e-3, n_batch = 100, modelfile = ['./intermediate/enc_model_dynAERNN.json', './intermediate/dec_model_dynAERNN.json'], weightfile = ['./intermediate/enc_weights_dynAERNN.hdf5', './intermediate/dec_weights_dynAERNN.hdf5'], savefilesuffix = "testing") embs = [] t1 = time() for temp_var in range(lookback+1, length+1): emb, _ = embedding.learn_embeddings(graphs[:temp_var]) embs.append(emb) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) plt.figure()
def main(args): # Set seeds np.random.seed(args.seed) from tensorflow import set_random_seed set_random_seed(args.seed) # Set the number of timesteps in the sequence num_timesteps = args.seq_len - 1 # one timestep per pair of consecutive graphs num_training_loops = num_timesteps - 1 # Num training loops to actually do (keep last graph for test/validation) data_loc = os.path.join(args.data_loc, args.dataset) # Preload the training graphs into memory...not very scaleable but helps with CPU load # Preload all but the last graph as this is use for val/test graphs = [] for i in range(num_timesteps): adj_train, features = third_party_utils.load_adj_graph( f'{data_loc}_t{i}.npz') # Load the input graph graphs.append( nx.from_scipy_sparse_matrix(adj_train, create_using=nx.DiGraph())) print(f'{args.dataset}_t{i} Loaded') assert len( graphs ) == num_timesteps #Should be the length of the time series as the index will start from zero print("Training graphs loaded into memory") # Extract the val/test graph which is the final one in the sequence val_test_graph_previous, _ = third_party_utils.load_adj_graph( f'{data_loc}_t{num_timesteps-1}.npz') val_test_graph, _ = third_party_utils.load_adj_graph( f'{data_loc}_t{num_timesteps}.npz') val_test_graph_adj, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = third_party_utils.mask_test_edges( val_test_graph) val_test_graph_adj, train_edges_pre, val_edges_pre, val_edges_false, test_edges_pre, test_edges_false = third_party_utils.mask_test_edges( val_test_graph_previous) pos_edges = np.concatenate((val_edges, test_edges, train_edges)).tolist() pos_edges = set(map(tuple, pos_edges)) pos_edges_pre = np.concatenate( (val_edges_pre, test_edges_pre, train_edges_pre)).tolist() pos_edges_pre = set(map(tuple, pos_edges_pre)) new_edges = np.array(list(pos_edges - pos_edges_pre)) num_edges = len(new_edges) new_edges_false = test_edges[:num_edges] print( f"Validation and Test edges capture from graph {args.dataset}_t{args.seq_len-1} in the sequence" ) # Chose the model to run #AE Static ---------------------------------------------------------------------------- # None offset auto encoder seems to be if args.model == "AE": embedding = AE(d=dim_emb, beta=5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 500, 300, ], n_iter=100, xeta=1e-6, n_batch=100, modelfile=[ './intermediate/enc_modelsbm.json', './intermediate/dec_modelsbm.json' ], weightfile=[ './intermediate/enc_weightssbm.hdf5', './intermediate/dec_weightssbm.hdf5' ]) t1 = time() #ae static # Loop through each of the graphs in the time series and train model print("Starting training AE") # for temp_var in range(num_training_loops): # emb, _= embedding.learn_embeddings(graphs[temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding)) accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) #dynAE ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Seems to just be an offset dense auto encoder trained to predict next graph. elif args.model == "DynAE": embedding = DynAE(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_units=[ 500, 300, ], rho=0.3, n_iter=150, xeta=1e-5, n_batch=100, modelfile=[ './intermediate/enc_model_dynAE.json', './intermediate/dec_model_dynAE.json' ], weightfile=[ './intermediate/enc_weights_dynAE.hdf5', './intermediate/dec_weights_dynAE.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # print(temp_var) # print(graphs[:temp_var]) # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: print("Here yo") accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) #dynRNN ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Only seems to use LSTM cells with no compression beforehand. elif args.model == "DynRNN": embedding = DynRNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_enc_units=[500, 200], n_dec_units=[200, 500], rho=0.3, n_iter=150, xeta=1e-4, n_batch=100, modelfile=[ './intermediate/enc_model_dynRNN.json', './intermediate/dec_model_dynRNN.json' ], weightfile=[ './intermediate/enc_weights_dynRNN.hdf5', './intermediate/dec_weights_dynRNN.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) #dynAERNN ------------------------------------------------------------------------------ # As proposed in dyngraph2vec paper. Use auto encoder before passing to an LSTM cell. elif args.model == "DynAERNN": embedding = DynAERNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_aeunits=[500, 300], n_lstmunits=[300, dim_emb], rho=0.3, n_iter=150, xeta=1e-3, n_batch=100, modelfile=[ './intermediate/enc_model_dynAERNN.json', './intermediate/dec_model_dynAERNN.json' ], weightfile=[ './intermediate/enc_weights_dynAERNN.hdf5', './intermediate/dec_weights_dynAERNN.hdf5' ], savefilesuffix="testing") t1 = time() # for temp_var in range(lookback+1, num_training_loops+1): # emb, _ = embedding.learn_embeddings(graphs[:temp_var]) #lp.expLP(graphs, embedding, 2, 0, 0) emb, _ = embedding.learn_embeddings(graphs[:num_training_loops]) if new_edges.size != 0: accuracy, roc_score, ap_score, tn, fp, fn, tp = third_party_utils.eval_gae( new_edges, new_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(new_edges, new_edges_false, embedding, use_embeddings=False)) else: accuracy, roc_score, ap_score, tn, fp, fn, tp = 0, 0, 0, 0, 0, 0, 0 ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp = third_party_utils.eval_gae( test_edges, test_edges_false, embedding, use_embeddings=False) print( third_party_utils.eval_gae(test_edges, test_edges_false, embedding, use_embeddings=False)) return accuracy, roc_score, ap_score, tn, fp, fn, tp, ae_accuracy, ae_roc_score, ae_ap_score, ae_tn, ae_fp, ae_fn, ae_tp
def main(): # data_list = ['cellphone', 'enron', 'fbmessages', 'HS11', 'HS12', 'primary', 'workplace'] data_list = ['bitcoin_alpha', 'bitcoin_otc', 'college_msg', 'enron_all', 'enron_all_shuffle'] funcs = ['AE', 'AERNN'] for data in data_list: graphs = process('data/' + data) length = len(graphs) dim_emb = 128 lookback = 3 for func in funcs: MAP_list = [] for i in range(length - lookback - 1): if func == 'AERNN': embedding = DynAERNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_aeunits=[500, 300], n_lstmunits=[500, dim_emb], rho=0.3, n_iter=250, xeta=1e-3, n_batch=100, modelfile=None, weightfile=None, savefilesuffix=None) elif func == 'RNN': embedding = DynRNN(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_enc_units=[500, 300], n_dec_units=[500, 300], rho=0.3, n_iter=250, xeta=1e-3, n_batch=100, modelfile=None, weightfile=None, savefilesuffix=None) else: embedding = DynAE(d=dim_emb, beta=5, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_units=[500, 300, ], rho=0.3, n_iter=250, xeta=1e-4, n_batch=100, modelfile=None, weightfile=None, savefilesuffix=None) embs = [] t1 = time() # for temp_var in range(lookback + 1, length + 1): emb, _ = embedding.learn_embeddings(graphs[i: i + lookback + 1]) embs.append(emb) print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1)) pred_adj = graphify(embedding.predict_next_adj()) edge_index_pre = evaluation_util.getEdgeListFromAdjMtx(adj=pred_adj) MAP = metrics.computeMAP(edge_index_pre, graphs[i + lookback + 1]) MAP_list.append(MAP) print('第' + str(i) + '-' + str(i + lookback) + '个时间片的MAP值为' + str(MAP)) MAP_list.append(np.mean(MAP_list)) result = {'MAP值': MAP_list} label = [] for i in range(len(MAP_list) - 1): row = '第' + str(i) + '-' + str(i + lookback) + '个时间片' label.append(row) label.append('mean_MAP') if not os.path.exists('result/' + data): os.mkdir('result/' + data) csv_path = 'result/' + data + '/' + str(func) + '.csv' df = pd.DataFrame(result, index=label) df.to_csv(csv_path)
length = 7 lookback = args.lookback if args.testDataType == 'sbm_rp': node_num = 1000 community_num = 50 node_change_num = 10 dynamic_sbm_series = dynamic_SBM_graph.get_random_perturbation_series(node_num, community_num, length, node_change_num) dynamic_embedding = DynAERNN( d=100, beta=100, n_prev_graphs=lookback, nu1=1e-6, nu2=1e-6, n_units=[50, 30, ], rho=0.3, n_iter=30, xeta=0.005, n_batch=50, modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'], ) dynamic_embedding.learn_embeddings([g[0] for g in dynamic_sbm_series]) plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding(dynamic_embedding.get_embeddings(), dynamic_sbm_series) plt.savefig('result/visualization_DynRNN_rp.png') plt.show() elif args.testDataType == 'sbm_cd': node_num = 100 community_num = 2 node_change_num = args.nodemigration dynamic_sbm_series = dynamic_SBM_graph.get_community_diminish_series_v2(node_num,
def plot_dynam_graph(title, graph_list): #dynAERNN graphs = [] for graph in graph_list: if not graph == None: graphs.append(graph) dynamic_sbm_series = graphs outdir = os.path.exists(os.path.dirname(__file__)+"/out") testDataType = 'sbm_cd' length = len(graph_list) dim_emb = 128 lookback = 2 embedding = DynAERNN(d = dim_emb, beta = 5, n_prev_graphs = lookback, nu1 = 1e-6, nu2 = 1e-6, n_aeunits = [500, 300], n_lstmunits = [500,dim_emb], rho = 0.3, n_iter = 250, xeta = 1e-3, n_batch = 100, modelfile = None, #['./intermediate/enc_model_dynAERNN.json', #'./intermediate/dec_model_dynAERNN.json'], weightfile = None, #['./intermediate/enc_weights_dynAERNN.hdf5', #'./intermediate/dec_weights_dynAERNN.hdf5'], savefilesuffix = None) #"testing") embs = [] t1 = time() for temp_var in range(lookback+1, length+1): emb, _ = embedding.learn_embeddings(graphs[:temp_var]) embs.append(emb) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) plt.figure() plt.clf() plot_dynamic_sbm_embedding.plot_dynamic_sbm_embedding_v2(embs[-5:-1], dynamic_sbm_series[-5:]) plt.show() # #dynamicTriad datafile = dataprep_util.prep_input_dynTriad(graphs, length, testDataType) embedding= dynamicTriad(niters = 20, starttime = 0, datafile = datafile, batchsize = 1000, nsteps = length, embdim = dim_emb, stepsize = 1, stepstride = 1, outdir = outdir, cachefn = '/tmp/'+ testDataType, lr = 0.1, beta = [0.1,0.1], negdup = 1, datasetmod = 'core.dataset.adjlist', trainmod = 'dynamicgem.dynamictriad.core.algorithm.dynamic_triad', pretrain_size = length, sampling_args = {}, validation = 'link_reconstruction', datatype = testDataType, scale = 1, classifier = 'lr', debug = False, test = 'link_predict', repeat = 1, resultdir = outdir, testDataType = testDataType, clname = 'lr') #node_num = node_num ) t1 = time() embedding.learn_embedding() print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) embedding.get_embedding() embedding.plotresults(dynamic_sbm_series)