def generate(scores, tg_sum, num_graphs): graphs = [] for i in range(num_graphs): sparse_mat = utils.graph_from_scores(scores, tg_sum) g = nx.from_numpy_array(sparse_mat, create_using=nx.Graph()) g.name = 'blah' # filler - renamed later in graph_models.py graphs.append(g) return graphs
def train(self, A_orig, val_ones, val_zeros, max_iters=50000, stopping=None, eval_transitions=15e6, transitions_per_iter=150000, max_patience=5, eval_every=500, plot_every=-1, save_directory="../snapshots", model_name=None, continue_training=False): """ Parameters ---------- A_orig: sparse matrix, shape: (N,N) Adjacency matrix of the original graph to be trained on. val_ones: np.array, shape (n_val, 2) The indices of the hold-out set of validation edges val_zeros: np.array, shape (n_val, 2) The indices of the hold-out set of validation non-edges max_iters: int, default: 50,000 The maximum number of training iterations if early stopping does not apply. stopping: float in (0,1] or None, default: None The early stopping strategy. None means VAL criterion will be used (i.e. evaluation on the validation set and stopping after there has not been an improvement for *max_patience* steps. Set to a value in the interval (0,1] to stop when the edge overlap exceeds this threshold. eval_transitions: int, default: 15e6 The number of transitions that will be used for evaluating the validation performance, e.g. if the random walk length is 5, each random walk contains 4 transitions. transitions_per_iter: int, default: 150000 The number of transitions that will be generated in one batch. Higher means faster generation, but more RAM usage. max_patience: int, default: 5 Maximum evaluation steps without improvement of the validation accuracy to tolerate. Only applies to the VAL criterion. eval_every: int, default: 500 Evaluate the model every X iterations. plot_every: int, default: -1 Plot the generator/discriminator losses every X iterations. Set to None or a negative number to disable plotting. save_directory: str, default: "../snapshots" The directory to save model snapshots to. model_name: str, default: None Name of the model (will be used for saving the snapshots). continue_training: bool, default: False Whether to start training without initializing the weights first. If False, weights will be initialized. Returns ------- log_dict: dict A dictionary with the following values observed during training: * The generator and discriminator losses * The validation performances (ROC and AP) * The edge overlap values between the generated and original graph * The sampled graphs for all evaluation steps. """ if stopping == None: # use VAL criterion best_performance = 0.0 patience = max_patience print("**** Using VAL criterion for early stopping ****") else: # use EO criterion assert "float" in str( type(stopping)) and stopping > 0 and stopping <= 1 print("**** Using EO criterion of {} for early stopping".format( stopping)) if not os.path.isdir(save_directory): os.makedirs(save_directory) if model_name is None: # Find the file corresponding to the lowest vacant model number to store the snapshots into. model_number = 0 while os.path.exists("{}/model_best_{}.ckpt".format( save_directory, model_number)): model_number += 1 save_file = "{}/model_best_{}.ckpt".format(save_directory, model_number) open(save_file, 'a').close() # touch file else: save_file = "{}/{}_best.ckpt".format(save_directory, model_name) print("**** Saving snapshots into {} ****".format(save_file)) if not continue_training: print("**** Initializing... ****") self.session.run(self.init_op) print("**** Done. ****") else: print( "**** Continuing training without initializing weights. ****") # Validation labels actual_labels_val = np.append(np.ones(len(val_ones)), np.zeros(len(val_zeros))) # Some lists to store data into. gen_losses = [] disc_losses = [] graphs = [] val_performances = [] eo = [] temperature = self.params['temp_start'] starting_time = time.time() saver = tf.train.Saver() transitions_per_walk = self.rw_len - 1 # Sample lots of random walks, used for evaluation of model. sample_many_count = int( np.round(transitions_per_iter / transitions_per_walk)) sample_many = self.generate_discrete(sample_many_count, reuse=True) n_eval_walks = eval_transitions / transitions_per_walk n_eval_iters = int(np.round(n_eval_walks / sample_many_count)) print("**** Starting training. ****") for _it in range(max_iters): if _it > 0 and _it % (2500) == 0: t = time.time() - starting_time print( '{:<7}/{:<8} training iterations, took {} seconds so far...' .format(_it, max_iters, int(t))) # Generator training iteration gen_loss, _ = self.session.run([self.gen_cost, self.gen_train_op], feed_dict={self.tau: temperature}) _disc_l = [] # Multiple discriminator training iterations. for _ in range(self.params['disc_iters']): disc_loss, _ = self.session.run( [self.disc_cost, self.disc_train_op], feed_dict={self.tau: temperature}) _disc_l.append(disc_loss) gen_losses.append(gen_loss) disc_losses.append(np.mean(_disc_l)) # Evaluate the model's progress. if _it > 0 and _it % eval_every == 0: # Sample lots of random walks. smpls = [] for _ in range(n_eval_iters): smpls.append(self.session.run(sample_many, {self.tau: 0.5})) # Compute score matrix gr = utils.score_matrix_from_random_walks( np.array(smpls).reshape([-1, self.rw_len]), self.N) gr = gr.tocsr() # Assemble a graph from the score matrix _graph = utils.graph_from_scores(gr, A_orig.sum()) # Compute edge overlap edge_overlap = utils.edge_overlap(A_orig.toarray(), _graph) graphs.append(_graph) eo.append(edge_overlap) edge_scores = np.append(gr[tuple(val_ones.T)].A1, gr[tuple(val_zeros.T)].A1) # Compute Validation ROC-AUC and average precision scores. val_performances.append( (roc_auc_score(actual_labels_val, edge_scores), average_precision_score(actual_labels_val, edge_scores))) # Update Gumbel temperature temperature = np.maximum( self.params['temp_start'] * np.exp(-(1 - self.params['temperature_decay']) * _it), self.params['min_temperature']) print( "**** Iter {:<6} Val ROC {:.3f}, AP: {:.3f}, EO {:.3f} ****" .format(_it, val_performances[-1][0], val_performances[-1][1], edge_overlap / A_orig.sum())) if stopping is None: # Evaluate VAL criterion if np.sum(val_performances[-1]) > best_performance: # New "best" model best_performance = np.sum(val_performances[-1]) patience = max_patience _ = saver.save(self.session, save_file) else: patience -= 1 if patience == 0: print("**** EARLY STOPPING AFTER {} ITERATIONS ****". format(_it)) break elif edge_overlap / A_orig.sum( ) >= stopping: # Evaluate EO criterion print( "**** EARLY STOPPING AFTER {} ITERATIONS ****".format( _it)) break if plot_every > 0 and (_it + 1) % plot_every == 0: if len(disc_losses) > 10: plt.plot(disc_losses[9::], label="Critic loss") plt.plot(gen_losses[9::], label="Generator loss") else: plt.plot(disc_losses, label="Critic loss") plt.plot(gen_losses, label="Generator loss") plt.legend() plt.show() print("**** Training completed after {} iterations. ****".format(_it)) plt.plot(disc_losses[9::], label="Critic loss") plt.plot(gen_losses[9::], label="Generator loss") plt.legend() plt.show() if stopping is None: saver.restore(self.session, save_file) #### Training completed. log_dict = { "disc_losses": disc_losses, 'gen_losses': gen_losses, 'val_performances': val_performances, 'edge_overlaps': eo, 'generated_graphs': graphs } return log_dict
def gen(scores, tg_sum): return utils.graph_from_scores(scores, tg_sum)
test_labels = np.concatenate( (np.ones(len(test_ones)), np.zeros(len(test_zeros)))) test_scores = np.concatenate((scores_matrix[tuple(test_ones.T)].A1, scores_matrix[tuple(test_zeros.T)].A1)) # In[37]: print(roc_auc_score(test_labels, test_scores)) # In[38]: print(average_precision_score(test_labels, test_scores)) A_select = train_graph print(A_select.sum()) sampled_graph = utils.graph_from_scores(scores_matrix, A_select.sum()) np.savetxt('netgan/plots/sampled_graph.txt', sampled_graph) stats = utils.compute_graph_statistics(sampled_graph) f = open('netgan/plots/stats.txt', "w") f.write(str(stats)) f.close() sampled_graph_from_walk = utils.graph_from_transitions( transition_tensor, edges, A_select.sum(), _N) print(type(sampled_graph_from_walk)) print(type(sampled_graph)) np.savetxt('netgan/plots/sampled_graph_from_walk.txt', sampled_graph_from_walk) stats = utils.compute_graph_statistics(sampled_graph_from_walk)