def update(opt, sess, feas, i, placeholders): struct_adj_norms = feas['struct_adj_norms'][i] struct_adj_origs = feas['struct_adj_origs'][i] struct_features = feas['struct_features'][i] temporal_adj_origs = feas['temporal_adj_origs'][i] struct_pos_weights = feas['struct_pos_weights'][i] struct_norms = feas['struct_norms'][i] temporal_pos_weights = feas['temporal_pos_weights'][i] temporal_norms = feas['temporal_norms'][i] # Construct feed dictionary feed_dict = construct_feed_dict(struct_adj_norms, struct_adj_origs, struct_features, temporal_adj_origs, struct_pos_weights, struct_norms, temporal_pos_weights, temporal_norms, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) _, reconstruct_loss, struct_cost, temporal_cost = sess.run( [opt.opt_op, opt.cost, opt.struct_cost, opt.temporal_cost], feed_dict=feed_dict) return reconstruct_loss, feed_dict, struct_cost, temporal_cost
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj, distribution, adj_dense): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['features_dense']: adj_dense}) z_real_dist = np.random.randn(adj.shape[0], FLAGS.hidden2) z_real_dist = distribution.sample(adj.shape[0]) feed_dict.update({placeholders['real_distribution']: z_real_dist}) for j in range(5): _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) GG_loss,_ = sess.run([opt.generator_loss_z2g, opt.generator_optimizer_z2g], feed_dict=feed_dict) g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict) d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict) with tf.device("/gpu:3"): GD_loss,_ = sess.run([opt.GD_loss, opt.discriminator_optimizer_z2g], feed_dict=feed_dict) #GD_loss = sess.run(opt.GD_loss, feed_dict=feed_dict) #GG_loss = sess.run(opt.generator_loss_z2g, feed_dict=feed_dict) #g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict) g_loss = sess.run(opt.generator_loss, feed_dict=feed_dict) d_loss = sess.run(opt.dc_loss, feed_dict=feed_dict) emb = sess.run(model.z_mean, feed_dict=feed_dict) avg_cost = [reconstruct_loss, d_loss, g_loss, GD_loss, GG_loss] return emb, avg_cost
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary sampled_id = np.zeros((features[2][0],1)) resultList=random.sample(range(features[2][0]),256) for i in resultList: sampled_id[i] = 1; feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['sample']: sampled_id}) emb_concat,emb_long = sess.run([model.embeddings_concat,model.embeddings_long], feed_dict=feed_dict) z_real_dist = np.random.randn(adj.shape[0], FLAGS.hidden2) feed_dict.update({placeholders['real_distribution']: z_real_dist}) for j in range(5): _, reconstruct_loss = sess.run([opt.O_opt_op, opt.cost], feed_dict=feed_dict) for m in range(10): _, pri_loss = sess.run([opt.A_opt_op, opt.pri_loss], feed_dict=feed_dict) d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict) g_loss, _,attr_loss,pri_loss,link_loss = sess.run([opt.generator_loss, opt.generator_optimizer,opt.attr_loss,opt.pri_loss,opt.link_cost], feed_dict=feed_dict) AE_cost = sess.run(opt.cost, feed_dict=feed_dict) return emb_long,emb_concat, AE_cost,attr_loss,pri_loss,link_loss
def warm_update(model, opt, sess, num_view, adj_norm, adj_label, features, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) #z_real_dist = np.random.randn(adj[0].shape[0], FLAGS.hidden2) #feed_dict.update({placeholders['real_distribution']: z_real_dist}) avg_cost = [] for j in range(5): for num in range(num_view): _, reconstruct_loss1 = sess.run( [opt.opt_op_list[num], opt.cost_list[num]], feed_dict=feed_dict) avg_cost.append(reconstruct_loss1) return avg_cost
def update_with_gan(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) # classes = sess.run(model.y, feed_dict=feed_dict).argmax(axis=1) z_real_dist = np.random.randn(adj.shape[0], FLAGS.hidden4) feed_dict.update({placeholders['real_distribution']: z_real_dist}) for j in range(5): _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict) g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict) avg_cost = reconstruct_loss # fh = open('loss_recoder.txt', 'a') # fh.write('Loss: %f, d_loss: %f, g_loss: %f' % (avg_cost, d_loss, g_loss)) # fh.write('\n') # fh.flush() # fh.close() return emb, avg_cost
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj, prior): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) #sns.set() featureAverage = np.mean(prior, axis=1) (mu, sigma) = norm.fit(featureAverage) z_real_dist = np.random.normal(mu, sigma, (adj.shape[0], FLAGS.hidden2)) z_real_dist_prior = np.random.normal(mu, sigma, (adj.shape[0], FLAGS.hidden2)) feed_dict.update({placeholders['real_distribution']: z_real_dist_prior}) for j in range(5): _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict) g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict) avg_cost = reconstruct_loss return emb, avg_cost
def predict(model, opt, sess, adj_norm, adj_label, features, placeholders, adj, prior): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) return emb
def test(saver, adj, features, meta_dir, checkpoints_dir): adj_norm, adj_norm_sparse = preprocess_graph(adj) placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Create model saver = tf.train.Saver(max_to_keep=10) model = None if model_str == "gae_gan": model = gaegan(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) global_steps = tf.get_variable(0, name="globals") opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'gae_gan': opt = Optimizergaegan(preds=model.x_tilde, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, global_step=global_steps) # session part sess = tf.Session() sess.run(tf.global_variables_initializer()) cost_val = [] acc_val = [] # load network with tf.Session() as sess: saver = tf.train.import_meta_graph(meta_dir) saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir)) sess.run() new_adj = get_new_adj(feed_dict) return new_adj
def update_kl(model, opt, sess, adj_norm, adj_label, features, p, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop, idx, label): # construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['p']: p}) #feed_dict.update({placeholders['dropout']: 0}) ''' for key in feed_dict.keys(): print('key', key) print('value', feed_dict[key]) ''' #feed_dict.update({placeholders['real_distribution']: z_real_dist}) for j in range(5): _, kl_loss = sess.run([opt.opt_op_kl, opt.cost_kl], feed_dict=feed_dict) ''' vars_embed = sess.run(opt.grads_vars, feed_dict=feed_dict) norms = [] for n in range(vars_embed[0][0].shape[0]): norms.append(np.linalg.norm(vars_embed[0][0][n])) cluster_layer_q = sess.run(model.cluster_layer_q, feed_dict=feed_dict) y_pred = cluster_layer_q.argmax(1) idx_list = [] for n in range(len(y_pred)): if y_pred[n]==idx: idx_list.append(n) norms = np.array(norms) norms_tmp = norms[idx_list] label = np.array(label)[idx_list] tmp_q = cluster_layer_q[idx_list][:, idx] print('idx', idx) fw = open('./norm_q.txt', 'w') for n in range(len(norms_tmp)): str1 = str(norms_tmp[n]) + ' ' + str(tmp_q[n]) + ' ' + str(label[n]) fw.write(str1) fw.write('\n') fw.close() ''' emb = sess.run(model.embeddings, feed_dict=feed_dict) avg_cost = kl_loss return emb, avg_cost
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) _, train_loss, loss_struc, loss_attr, rec_error = sess.run([opt.opt_op, opt.cost, opt.structure_cost, opt.attribute_cost, opt.reconstruction_errors], feed_dict=feed_dict) return train_loss, loss_struc, loss_attr, rec_error
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # feed_dict.update({placeholders['dropout']: 0}) # emb = sess.run(model.z_mean, feed_dict=feed_dict) _, reconstruct_loss, reconstruction_errors = sess.run([opt.opt_op, opt.cost, opt.reconstruction_errors], feed_dict=feed_dict) return reconstruction_errors, reconstruct_loss
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj,B): # Construct feed dictionary #feed_dict={placeholders['features']: features,model.adj: adj, model.inputs:features,placeholders['adj_orig']: adj} feed_dict = construct_feed_dict(model.B,adj_norm, adj_label, features, placeholders,B) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) #print('????????????????') _, train_loss, re_loss,kl_loss, loss_stru, loss_attr, rec_error = sess.run([opt.opt_op, opt.cost, opt.re_loss, opt.kl_loss, opt.structure_cost, opt.attribute_cost, opt.reconstruction_errors], feed_dict=feed_dict) return train_loss, re_loss,kl_loss,loss_stru, loss_attr, rec_error
def warm_update_test(model, opt, sess, adj_norm, adj_label, features, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) emb = sess.run(model.embeddings, feed_dict=feed_dict) return emb
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['dropout']: 0}) # emb = sess.run(model.z_mean, feed_dict=feed_dict) # classes = sess.run(model.y, feed_dict=feed_dict).argmax(axis=1) for j in range(1): _, reconstruct_loss, emb = sess.run( [opt.opt_op, opt.cost, model.z_mean], feed_dict=feed_dict) avg_cost = reconstruct_loss # fh = open('loss_recoder.txt', 'a') # fh.write('Loss: %f' % (avg_cost)) # fh.write('\n') # fh.flush() # fh.close() return emb, avg_cost
def update_test(model, opt, sess, adj_norm, adj_label, features, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) #feed_dict.update({placeholders['dropout']: 0}) ''' for key in feed_dict.keys(): print('key', key) print('value', feed_dict[key]) ''' emb_ind = sess.run(model.embeddings, feed_dict=feed_dict) return emb_ind
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) reconstruct_loss = 0 for j in range(5): _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) d_loss = 0 g_loss = 0 avg_cost = reconstruct_loss return avg_cost
def load_model(placeholders, model, opt, adj_train, test_edges, test_edges_false, features, sess, name="single_fold"): adj = adj_train # This will be calculated for every fold # pos_weight and norm should be tensors print ('----------------') pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (N) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['is_training']: True}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['pos_weight']: pos_weight}) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) saver = tf.train.Saver() saver.restore(sess=sess, save_path=(save_dir+name)) print ('Model restored') # Decrease MC samples for pubmed if (dataset_str == 'pubmed'): S = 5 else: S = 15 adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=S, save_qual=True) return adj_score, z_activated
def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj): # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) z_real_dist = np.random.randn(adj.shape[0], FLAGS.hidden2) feed_dict.update({placeholders['real_distribution']: z_real_dist}) for j in range(5): _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict) g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict) avg_cost = reconstruct_loss return emb, avg_cost
def compute_q(model, opt, sess, adj_norm, adj_label, features, placeholders, pos_weights, fea_pos_weights, norm, attn_drop, ffd_drop): # construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['attn_drop']: attn_drop}) feed_dict.update({placeholders['ffd_drop']: ffd_drop}) feed_dict.update({placeholders['pos_weights']: pos_weights}) feed_dict.update({placeholders['fea_pos_weights']: fea_pos_weights}) feed_dict.update({placeholders['norm']: norm}) #feed_dict.update({placeholders['dropout']: 0}) ''' for key in feed_dict.keys(): print('key', key) print('value', feed_dict[key]) ''' #feed_dict.update({placeholders['real_distribution']: z_real_dist}) q = sess.run(model.cluster_layer_q, feed_dict=feed_dict) return q
return roc_score, ap_score, accuracy cost_val = [] acc_val = [] val_roc_score = [] adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) epoch_time = 0 # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) # global variable feed_dict.update({placeholders['in_drop']: FLAGS.in_drop }) # update is a methold of python dictionary feed_dict.update({placeholders['attn_drop']: FLAGS.attn_drop}) feed_dict.update({placeholders['feat_drop']: FLAGS.feat_drop}) # Run single weight update t0 = time.time() outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) t1 = time.time() epoch_time += t1 - t0 # write summary if epoch % 5 == 0 and FLAGS.write_summary: # Train set summary summary = sess.run(merged_summary, feed_dict=feed_dict) summary_writer.add_summary(summary, epoch)
pos_weight=pos_weight, norm=norm) logging.info('initialize session') # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) logging.info('train model') # Train model for epoch in range(FLAGS.epochs): t = time.time() logging.info('construct dictionary') # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) logging.info('The epoch is: {}'.format(epoch)) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch: %d, train_loss = %s, train_acc = %s, time cost = %s" % (epoch, str(avg_cost), str(avg_accuracy), str(time.time() - t))) vae = True # write the embedding to file feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges, test_edges_false): # Settings flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.') flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.') flags.DEFINE_float('weight_decay', 0., 'Weight for L2 loss on embedding matrix.') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_string('model', 'gcn_vae', 'Model string.') flags.DEFINE_integer('features', 1, 'Whether to use features (1) or not (0).') model_str = FLAGS.model #1-dim index array, used in cost function to only focus on those interactions with high confidence mask_index = construct_optimizer_list(features.shape[0], train_edges, train_edges_false) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = 1 norm = 1 #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0]) #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0]) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm, mask=mask_index) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, mask=mask_index) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean, feed_dict=feed_dict) return emb
val_roc_score = [] adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(FLAGS.epochs): t = time.time() edge_types_iterator = list(range(num_edge_types)) for et in edge_types_iterator: # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders, et, idx2edge_type) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] roc_curr, ap_curr = get_roc_score(val_positive, val_negative) val_roc_score.append(roc_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t))
def train_gcn(features, adj_train, args, graph_type): model_str = args.model # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero, args.hidden1, args.hidden2) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, args.hidden1, args.hidden2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=1, norm=1, lr=args.lr) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=1, norm=1, lr=args.lr) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model # use different epochs for ppi and similarity network if graph_type == "sequence_similarity": epochs = args.epochs_simi else: epochs = args.epochs_ppi for epoch in range(epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: args.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) if epoch % 10 == 0: print("Epoch:", '%04d' % (epoch+1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean,feed_dict=feed_dict) return emb
return roc_score, ap_score cost_val = [] acc_val = [] val_roc_score = [] adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_label = tf.SparseTensorValue(adj_label[0], adj_label[1], adj_label[2]) features_label = sparse_to_tuple(fea_train) features_label = tf.SparseTensorValue(features_label[0], features_label[1], features_label[2]) # Construct feed dictionary feed_dict = construct_feed_dict(Fn_train, adj_label, Fa_train, features_label, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Train model for epoch in range(FLAGS.epochs): t = time.time() # Run single weight update #outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.log_lik, opt.kl], feed_dict=feed_dict) outs = sess.run([ opt.opt_op, opt.cost, opt.accuracy, opt.cost_recon, opt.kl, opt.py, opt.entropy_y, opt.cost_a ], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1]
def train(unused): if_drop_edge = True if_save_model = not FLAGS.test if_train_dis = False # if train the community detection while training the generator part restore_trained_our = FLAGS.test showed_target_idx = 0 # the target index group of targets you want to show ################################## ### read and process the graph model_str = FLAGS.model dataset_str = FLAGS.dataset # Load data if FLAGS.dataset == "dblp": adj = sp.load_npz("data/dblp/dblp_medium_adj.npz") features = np.load("data/dblp/dblp_medium_features.npy") features_normlize = normalize(features, axis=0, norm='max') features = sp.csr_matrix(features_normlize) target_list = np.load("data/dblp/dblp_medium_label.npy") elif FLAGS.dataset == "finance": adj = sp.load_npz('./data/finance/Finance_large_adj.npz') features = np.load("data/finance/Finance_large_features.npy") features_normlize = normalize(features, axis=0, norm='max') features = sp.csr_matrix(features_normlize) target_list = np.load("data/finance/Finance_large_label.npy") # Store original adjacency matrix (without diagonal entries) for later a = 1 adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm, adj_norm_sparse = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] cost_val = [] acc_val = [] cost_val = [] acc_val = [] val_roc_score = [] adj_label = adj_orig + sp.eye(adj.shape[0]) adj_label_sparse = adj_label adj_label = sparse_to_tuple(adj_label) if_drop_edge = True ## set the checkpoint path checkpoints_dir_base = "./checkpoints" current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S") checkpoints_dir = os.path.join(checkpoints_dir_base, current_time, current_time) tf.reset_default_graph() global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init, global_step=global_steps, decay_steps=10000, decay_rate=0.98) new_learn_rate_value = FLAGS.learn_rate_init ## set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0., shape=(), name="ph_dropout"), } # build models model = None if model_str == "cdattack": model = cdattack(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate, target_list, FLAGS.alpha, FLAGS.comm_name) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'cdattack': opt = Optimizercdattack(preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, target_list=target_list, global_step=global_steps, new_learning_rate=new_learning_rate) # init the sess sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = "" var_list = tf.global_variables() saver = tf.train.Saver(var_list, max_to_keep=10) if if_save_model: os.mkdir(os.path.join(checkpoints_dir_base, current_time)) saver.save(sess, checkpoints_dir) # save the graph if restore_trained_our: checkpoints_dir_our = "./checkpoints" checkpoints_dir_our = os.path.join(checkpoints_dir_our, FLAGS.trained_our_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_our)) print("model_load_successfully") feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) modified_adj = get_new_adj(feed_dict, sess, model) modified_adj = sp.csr_matrix(modified_adj) ##################################################### G_loss_min = 1000 if FLAGS.test == False: for epoch in range(FLAGS.epochs): t = time.time() if restore_trained_our: sess.run(opt.G_min_op, feed_dict=feed_dict) else: # it is the new model if epoch >= int(FLAGS.epochs / 2): sess.run(opt.G_min_op, feed_dict=feed_dict) if if_train_dis == True: sess.run(opt.D_min_op, feed_dict=feed_dict) # run D optimizer if epoch < int(FLAGS.epochs / 2): sess.run(opt.D_min_op_clean, feed_dict=feed_dict) if epoch % 50 == 0: print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) comm_loss_clean, comm_loss, G_loss, new_learn_rate_value = sess.run( [ opt.D_mincut_loss_clean, opt.D_mincut_loss, opt.G_comm_loss, new_learning_rate ], feed_dict=feed_dict) new_adj = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) temp_ori = adj_label_sparse.todense().A.reshape(-1) print( "Step %d:Loss Lu_clean = %.7f , Loss Lu = %.7f Loss Lg: loss=%.7f , LR=%.7f" % (epoch, comm_loss_clean, comm_loss, G_loss, new_learn_rate_value)) ## check the D_loss_min if (G_loss < G_loss_min) and ( epoch > int(FLAGS.epochs / 2) + 1) and (if_save_model): saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("min G_loss new") if G_loss < G_loss_min: G_loss_min = G_loss if (epoch % 200 == 0) and if_save_model: saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("Save the model at epoch:", '%04d' % (epoch + 1)) if if_save_model: saver.save(sess, checkpoints_dir, global_step=FLAGS.epochs, write_meta_graph=False) print("Optimization Finished!") new_adj = get_new_adj(feed_dict, sess, model) ##### The final results ###### feed_dict.update({placeholders['dropout']: 0}) pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) print("*" * 15) print("The modified matrics") print_M1(target_list, pred_dis_res, FLAGS.n_clusters) print("*" * 15) print_M2(target_list, pred_dis_res, FLAGS.n_clusters) print("*" * 15) new_adj = get_new_adj(feed_dict, sess, model) x_tilde_out = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) temp_ori = adj_norm_sparse.todense().A.reshape(-1) return
def train(): adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop # adj_orig.eliminate_zeros() # adj_new = randomly_add_edges(adj_orig, k=FLAGS.k) adj_new = adj_orig features_new_csr = randomly_flip_features(features_csr, k=FLAGS.k, seed=seed + 5) feature_new = sparse_to_tuple(features_new_csr.tocoo()) # feature_new = features # features_new_csr =features_csr # features_nonzero = feature_new[1].shape[0] # train GCN first # sizes = [FLAGS.gcn_hidden1, FLAGS.gcn_hidden2, n_class] # surrogate_model = GCN.GCN(sizes, adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id) # surrogate_model.train(adj_norm_sparse_csr, split_train, split_val, node_labels) # ori_acc = surrogate_model.test(split_unlabeled, node_labels, adj_norm_sparse_csr) ####################### the clean and noised GCN ############################ testacc_clean, valid_acc_clean = GCN.run(FLAGS.dataset, adj_orig, features_csr, name="clean") testacc, valid_acc = GCN.run(FLAGS.dataset, adj_new, features_new_csr, name="original") testacc_upper, valid_acc_upper = GCN.run(FLAGS.dataset, adj_new, features_csr, name="upper_bound") ########### print(testacc_clean) print(testacc) print(testacc_upper) ########### ############################################################################## adj_norm, adj_norm_sparse = preprocess_graph(adj_new) adj_norm_sparse_csr = adj_norm_sparse.tocsr() adj_label = adj_new + sp.eye(adj.shape[0]) adj_label_sparse = adj_label adj_label = sparse_to_tuple(adj_label) if_drop_edge = True ## set the checkpoint path checkpoints_dir_base = "./checkpoints" current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S") checkpoints_dir = os.path.join(checkpoints_dir_base, current_time, current_time) ############ global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init, global_step=global_steps, decay_steps=10000, decay_rate=0.98) new_learn_rate_value = FLAGS.learn_rate_init ## set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0., shape=(), name="ph_dropout"), # 'node_labels': tf.placeholder(tf.float32, name = "ph_node_labels"), # 'node_ids' : tf.placeholder(tf.float32, name = "ph_node_ids") } # build models model = None if model_str == "gae_gan": model = gaegan(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'gae_gan': opt = Optimizergaegan( preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), #comm_label=placeholders["comm_label"], model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, global_step=global_steps, new_learning_rate=new_learning_rate) # init the sess sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = "" var_list = tf.global_variables() var_list = [ var for var in var_list if ("encoder" in var.name) or ('generate' in var.name) ] saver = tf.train.Saver(var_list, max_to_keep=10) if if_save_model: os.mkdir(os.path.join(checkpoints_dir_base, current_time)) saver.save(sess, checkpoints_dir) # save the graph if restore_trained_our: checkpoints_dir_our = "./checkpoints" checkpoints_dir_our = os.path.join(checkpoints_dir_our, FLAGS.trained_our_path) # checkpoints_dir_meta = os.path.join(checkpoints_dir_base, FLAGS.trained_our_path, # FLAGS.trained_our_path + ".meta") #saver.restore(sess,tf.train.latest_checkpoint(checkpoints_dir_our)) saver.restore( sess, os.path.join("./checkpoints", "191215231708", "191215231708-1601")) print("model_load_successfully") # else: # if not restore the original then restore the base dis one. # checkpoints_dir_base = os.path.join("./checkpoints/base", FLAGS.trained_base_path) # saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_base)) feed_dict = construct_feed_dict(adj_norm, adj_label, feature_new, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) #### save new_adj without norm############# if restore_trained_our: modified_adj = get_new_adj(feed_dict, sess, model) modified_adj = sp.csr_matrix(modified_adj) sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_new.npz", modified_adj) sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_ori.npz", adj_new) print("save the loaded adj") # print("before training generator") ##################################################### ## get all variables in the model def model_summary(): model_vars = tf.trainable_variables() slim.model_analyzer.analyze_vars(model_vars, print_info=True) model_summary() ##################################################### G_loss_min = 1000 for epoch in range(FLAGS.epochs): t = time.time() # run Encoder's optimizer #sess.run(opt.encoder_min_op, feed_dict=feed_dict) # run G optimizer on trained model if restore_trained_our: sess.run(opt.G_min_op, feed_dict=feed_dict, options=run_options) else: # it is the new model if epoch < FLAGS.epochs: sess.run(opt.G_min_op, feed_dict=feed_dict, options=run_options) # ## ## if epoch % 50 == 0: print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) G_loss, laplacian_para, new_learn_rate_value = sess.run( [opt.G_comm_loss, opt.reg, new_learning_rate], feed_dict=feed_dict, options=run_options) #new_adj = get_new_adj(feed_dict, sess, model) new_adj = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) #temp_ori = adj_norm_sparse.todense().A.reshape(-1) temp_ori = adj_label_sparse.todense().A.reshape(-1) mutual_info = normalized_mutual_info_score(temp_pred, temp_ori) print( "Step: %d,G: loss=%.7f ,Lap_para: %f ,info_score = %.6f, LR=%.7f" % (epoch, G_loss, laplacian_para, mutual_info, new_learn_rate_value)) ## here is the debug part of the model################################# new_features, reg_trace, reg_log, reward_ratio, node_per, fea_per = sess.run( [ model.new_fliped_features, opt.reg_trace, opt.reg_log, opt.percentage_fea, model.node_per, model.fea_per ], feed_dict=feed_dict) print("reg_trace is:") print(reg_trace) print("reg_log is:") print(reg_log) print("reward_percentage") print(reward_ratio) print("New features") print(new_features[5, :20]) print("node_percent") print(node_per) print("fea_per") print(fea_per) new_features_csr = sp.csr_matrix(new_features) ########################################## #';# check the D_loss_min if (G_loss < G_loss_min) and (epoch > 1000) and (if_save_model): saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("min G_loss new") if G_loss < G_loss_min: G_loss_min = G_loss if (epoch % 200 == 1) and if_save_model: saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) saver.save(sess, checkpoints_dir, global_step=FLAGS.epochs, write_meta_graph=True) print("Optimization Finished!") feed_dict.update({placeholders['dropout']: 0}) new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) print((abs(new_adj_sparse - new_adj_sparse.T) > 1e-10).nnz == 0) # new_adj_norm, new_adj_norm_sparse = preprocess_graph(new_adj) # new_adj_norm_sparse_csr = new_adj_norm_sparse.tocsr() # modified_model = GCN.GCN(sizes, new_adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id) # modified_model.train(new_adj_norm_sparse_csr, split_train, split_val, node_labels) # modified_acc = modified_model.test(split_unlabeled, node_labels, new_adj_norm_sparse_csr) testacc_new, valid_acc_new = GCN.run(FLAGS.dataset, new_adj_sparse, features_csr, name="modified") new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) testacc_new2, valid_acc_new = GCN.run(FLAGS.dataset, adj_new, new_features_csr, name="modified2") new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) testacc_new3, valid_acc_new = GCN.run(FLAGS.dataset, new_adj_sparse, new_features_csr, name="modified3") #np.save("./data/hinton/hinton_new_adj_48_0815.npy", new_adj) #roc_score, ap_score = get_roc_score(test_edges, test_edges_false,feed_dict, sess, model) ##### The final results #### print("*" * 30) print("the final results:\n") print("*" * 30) print("The clean acc is: ") print(testacc_clean) print("*#" * 15) print("The original acc is: ") print(testacc) print("*#" * 15) print("The only modify adj acc is : ") print(testacc_new) print("*#" * 15) print("The only modify feature acc is : ") print(testacc_new2) print("*#" * 15) print("The modify both adj and feature and acc is : ") print(testacc_new3) return new_adj, testacc_clean, testacc, testacc_new, testacc_new2, testacc_new3
def train(placeholders, model, opt, adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, features, sess, name="single_fold"): adj = adj_train pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (2N) print (adj_train.shape) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) # session initialize sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() val_roc_score = [] best_validation = 0.0 num_nodes = adj.shape[0] edges_for_loss = np.ones((num_nodes*num_nodes), dtype=np.float32) ignore_edges = [] edges_to_ignore = np.concatenate((val_edges, val_edges_false, test_edges, test_edges_false), axis=0) for e in edges_to_ignore: ignore_edges.append(e[0]*num_nodes+e[1]) edges_for_loss[ignore_edges] = 0 num_train = num_nodes * num_nodes - len(ignore_edges) last_best_epoch = 0 # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['is_training']: True}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['pos_weight']: pos_weight}) feed_dict.update({placeholders['edges_for_loss']: edges_for_loss}) feed_dict.update({placeholders['num_train']: num_train}) avg_x_cost = 0 if model_str == 'dglfrm': outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z_real, model.z_discrete], feed_dict=feed_dict) # a, b are global parameters a, b = np.log(1 + np.exp(outs[4])), np.log(1 + np.exp(outs[5])) a = np.mean(a) b = np.mean(b) #regularization = round(outs[3], 2) regularization = 0 z_discrete = outs[7] z_real = outs[6] avg_x_cost = outs[3] W = None elif model_str == 'dglfrm_b': outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z], feed_dict=feed_dict) regularization = 0 z_discrete = outs[6] z_real = None avg_x_cost = outs[3] W = None # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] adj_rec, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=1) roc_curr, ap_curr, _ = get_roc_score(adj_rec, val_edges, val_edges_false) print("Epoch:", '%03d' % (epoch + 1), "cost=", "{:.3f}".format(avg_cost), "x_recon_loss=", "{:.2f}".format(avg_x_cost), "val_roc=", "{:.3f}".format(roc_curr), "val_ap=", "{:.3f}".format(ap_curr), 'activated_z=', "{:.1f}".format(z_activated), "time=", "{:.2f}".format(time.time() - t)) roc_curr = round(roc_curr, 3) val_roc_score.append(roc_curr) # Look-ahead epochs: (We may need to train for some more epochs due to nested stochastic nature of the framework.) if FLAGS.early_stopping != 0 and roc_curr > best_validation: # save model print ('Saving model') saver.save(sess=sess, save_path=save_dir+name) best_validation = roc_curr last_best_epoch = 0 if FLAGS.early_stopping != 0 and last_best_epoch > FLAGS.early_stopping: break else: last_best_epoch += 1 print("Optimization Finished!") val_max_index = np.argmax(val_roc_score) print ('---------------------------------') print('Validation ROC Max: {:.3f} at Epoch: {:04d}'.format(val_roc_score[val_max_index], val_max_index)) qual_file = 'data/qual_' + dataset_str + '_' + model_str if model_str == 'dglfrm': np.savez(qual_file, z_discrete=np.asarray(z_discrete), z_real=np.asarray(z_real), z_out=np.asarray(np.multiply(np.round(z_discrete), z_real)), adj_rec=adj_rec) elif model_str == 'dglfrm_b': np.savez(qual_file, z_discrete=np.asarray(z_discrete), adj_rec=adj_rec) if FLAGS.early_stopping != 0: saver.restore(sess=sess, save_path=(save_dir+name)) adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model) return adj_score, z_activated
def train(): ## add noise label train_adj_list, train_adj_orig_list, train_k_list = add_noises_on_adjs( train_structure_input, train_num_nodes_all) test_adj_list, test_adj_orig_list, test_k_list = add_noises_on_adjs( test_structure_input, test_num_nodes_all) adj = train_adj_list[0] features_csr = train_feature_input[0] features = sparse_to_tuple(features_csr.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_orig = train_adj_orig_list[0] adj_label = train_adj_list[0] + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) num_nodes = adj.shape[0] adj_norm, adj_norm_sparse = preprocess_graph(adj) ############ global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate_dis = tf.train.exponential_decay( FLAGS.learn_rate_init, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learning_rate_gen = tf.train.exponential_decay( FLAGS.learn_rate_init_gen, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learn_rate_value = FLAGS.learn_rate_init # set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0.3, shape=(), name="ph_dropout"), 'clean_mask': tf.placeholder(tf.int32), 'noised_mask': tf.placeholder(tf.int32), 'noised_num': tf.placeholder(tf.int32), 'node_mask': tf.placeholder(tf.float32) } # build models model = None adj_clean = adj_orig.tocoo() adj_clean_tensor = tf.SparseTensor(indices=np.stack( [adj_clean.row, adj_clean.col], axis=-1), values=adj_clean.data, dense_shape=adj_clean.shape) if model_str == "mask_gvae": model = mask_gvae(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate_dis, new_learning_rate_gen, adj_clean=adj_clean_tensor, k=int(adj.sum() * noise_ratio)) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'mask_gvae': opt = Optimizer(preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, global_step=global_steps, new_learning_rate=new_learning_rate_dis, new_learning_rate_gen=new_learning_rate_gen, placeholders=placeholders) # init the session sess = tf.Session() # sess.run(tf.global_variables_initializer()) # initial test # initial clean and noised_mask clean_mask = np.array([1, 2, 3, 4, 5]) noised_mask = np.array([6, 7, 8, 9, 10]) noised_num = noised_mask.shape[0] / 2 # ################################## feed_dict = construct_feed_dict(adj_norm, adj_label, features, clean_mask, noised_mask, noised_num, placeholders) node_mask = np.ones([num_nodes, n_class]) node_mask[train_num_nodes_all[0]:, :] = 0 feed_dict.update({placeholders['node_mask']: node_mask}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # ################################## if if_train: sess.run(tf.global_variables_initializer()) # initial test for epoch in range(FLAGS.epochs): for i in tqdm(range(len(train_feature_input))): train_one_graph(train_adj_list[i], train_adj_orig_list[i], train_feature_input[i], train_num_nodes_all[i], train_k_list[i], model, opt, placeholders, sess, new_learning_rate_gen, feed_dict, epoch, i) saver = tf.train.Saver() # define saver in the loop saver.save(sess, "./checkpoints/{}.ckpt".format(dataset_str)) print("Optimization Finished!") psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) else: saver = tf.train.Saver() # define saver in the loop saver.restore(sess, "./checkpoints/{}.ckpt".format(dataset_str)) psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) ################################## ################## the PSRN and WL ######################### print("#" * 15) print("The PSNR is:") print(np.mean(psnr_list)) print("The WL is :") print(np.mean(wls_list)) return np.mean(psnr_list), np.mean(wls_list)