def format_data(data_source): adj, features, labels = load_data(data_source) # Store original adjacency matrix (without diagonal entries) for later # adj_orig = adj # adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # adj_orig.eliminate_zeros() # adj = adj_orig if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def format_data(data_source): adj, features, labels = load_data2(data_source) if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def format_data_new(adj, features): # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) values = [ adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, adj_orig ] keys = [ 'adj', 'num_features', 'num_nodes', 'features_nonzero', 'pos_weight', 'norm', 'adj_norm', 'adj_label', 'features', 'adj_orig' ] feas = {} feas = dict(zip(keys, values)) return feas
def format_data(data_name): # Load data adj, features, y_test, tx, ty, test_maks, true_labels = load_data( data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj #删除对角线元素 adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train adj_dense = adj.toarray() if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features_dense = features.tocoo().toarray() features = sparse_to_tuple(features.tocoo()) #num_features是feature的维度 num_features = features[2][1] #features_nonzero就是非零feature的个数 features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, true_labels, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, adj_orig, features_dense, adj_dense, features_dense ] feas = {} print('num_features is:', num_features) print('num_nodes is:', num_nodes) print('features_nonzero is:', features_nonzero) print('pos_weight is:', pos_weight) print('norm is:', norm) for item in items: #item_name = [ k for k,v in locals().iteritems() if v == item][0] feas[retrieve_name(item)] = item return feas
def format_data(data_name): # Load data #adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name) print("&&&&&&&&&&&&&&&&&", data_name) rownetworks, numView, features, truelabels, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( data_name) adjs_orig = [] for v in range(numView): adj_orig = rownetworks[v] adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) #adj_orig.eliminate_zeros() adjs_orig.append(adj_orig) adjs_label = rownetworks adjs_orig = np.array(adjs_orig) adjs = adjs_orig if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adjs_norm = preprocess_graph(adjs) num_nodes = adjs[0].shape[0] features = features num_features = features.shape[1] #features_nonzero = features[1].shape[0] fea_pos_weights = float(features.shape[0] * features.shape[1] - features.sum()) / features.sum() pos_weights = [] norms = [] for v in range(numView): pos_weight = float(adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) / adjs[v].sum() norm = adjs[v].shape[0] * adjs[v].shape[0] / float( (adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) * 2) pos_weights.append(pos_weight) norms.append(norm) true_labels = truelabels feas = { 'adjs': adjs_norm, 'adjs_label': adjs_label, 'num_features': num_features, 'num_nodes': num_nodes, 'true_labels': true_labels, 'pos_weights': pos_weights, 'norms': np.array(norms), 'adjs_norm': adjs_norm, 'features': features, 'fea_pos_weights': fea_pos_weights, 'numView': numView } return feas
def test(saver, adj, features, meta_dir, checkpoints_dir): adj_norm, adj_norm_sparse = preprocess_graph(adj) placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Create model saver = tf.train.Saver(max_to_keep=10) model = None if model_str == "gae_gan": model = gaegan(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) global_steps = tf.get_variable(0, name="globals") opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'gae_gan': opt = Optimizergaegan(preds=model.x_tilde, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, global_step=global_steps) # session part sess = tf.Session() sess.run(tf.global_variables_initializer()) cost_val = [] acc_val = [] # load network with tf.Session() as sess: saver = tf.train.import_meta_graph(meta_dir) saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir)) sess.run() new_adj = get_new_adj(feed_dict) return new_adj
def format_data(data_name): # Load data adj, features, true_labels = load_data(data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + 2 * sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) feas = {} feas['adj'] = adj feas['num_features'] = num_features feas['num_nodes'] = num_nodes feas['features_nonzero'] = features_nonzero feas['pos_weight'] = pos_weight feas['norm'] = norm feas['adj_norm'] = adj_norm feas['adj_label'] = adj_label feas['features'] = features feas['true_labels'] = true_labels feas['train_edges'] = train_edges feas['val_edges'] = val_edges feas['val_edges_false'] = val_edges_false feas['test_edges'] = test_edges feas['test_edges_false'] = test_edges_false feas['adj_orig'] = adj_orig return feas
def load_model(placeholders, model, opt, adj_train, test_edges, test_edges_false, features, sess, name="single_fold"): adj = adj_train # This will be calculated for every fold # pos_weight and norm should be tensors print ('----------------') pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (N) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['is_training']: True}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['pos_weight']: pos_weight}) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) saver = tf.train.Saver() saver.restore(sess=sess, save_path=(save_dir+name)) print ('Model restored') # Decrease MC samples for pubmed if (dataset_str == 'pubmed'): S = 5 else: S = 15 adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=S, save_qual=True) return adj_score, z_activated
def test_one_graph(adj, adj_orig, features_csr, num_node, k_num, model, placeholders, sess, feed_dict): adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop adj_orig.eliminate_zeros() adj_new = adj features = sparse_to_tuple(features_csr.tocoo()) adj_label = adj_new + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_clean = adj_orig.tocsr() k_num = int(k_num * size / noise_ratio) # match the budget size if k_num != 0: adj_norm, adj_norm_sparse = preprocess_graph(adj_new) feed_dict.update({placeholders["adj"]: adj_norm}) feed_dict.update({placeholders["adj_orig"]: adj_label}) feed_dict.update({placeholders["features"]: features}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) model.k = k_num x_tilde = sess.run(model.realD_tilde, feed_dict=feed_dict, options=run_options) noised_indexes, clean_indexes = get_noised_indexes( x_tilde, adj_new, num_node) feed_dict.update({placeholders["noised_mask"]: noised_indexes}) feed_dict.update({placeholders["clean_mask"]: clean_indexes}) feed_dict.update({placeholders["noised_num"]: len(noised_indexes) / 2}) test1 = model.test_new_indexes.eval(session=sess, feed_dict=feed_dict) test0 = model.test_noised_index.eval(session=sess, feed_dict=feed_dict) new_adj = get_new_adj(feed_dict, sess, model, noised_indexes, adj_new, k_num, num_node) else: # new_adj = adj new_adj = adj.copy() new_adj_sparse = sp.csr_matrix(new_adj) psnr = PSNR(adj_clean[:num_node, :num_node], new_adj_sparse[:num_node, :num_node]) wls = WL_no_label(adj_clean[:num_node, :num_node], new_adj_sparse[:num_node, :num_node]) return psnr, wls
def format_data(data_source): adj, features, labels = load_data2(data_source) #print(adj.shape,'1111111111') #print(features.shape,'2222222') #print(features,'XXXXXSSSSSSSSSSSSS') #print(labels.shape,'33333333333') if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) #print(adj_norm,'0000000000000') num_nodes = adj.shape[0] #print(num_nodes,'444444444') features = sparse_to_tuple(features.tocoo()) #print(features,'NNNNNNNNNNNNNNNNN') #print(features[0].shape,'66666666666') #print(features[1].shape, '66666666666@@@') #print(features[2], '66666666666###') num_features = features[2][1] #print(num_features,'7777777777777') features_nonzero = features[1].shape[0] #print(features_nonzero,'8888888888888') adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) #print(adj_label,'AAAAAAAAAAAAAAAAAAAA') items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def prepare_data_for_model(adj_train, target_adj_train, device): """"Prepare the given data ready to be put in the model""" # Some preprocessing adj_train_norm = preprocess_graph(adj_train) adj_train_norm = make_sparse(adj_train_norm) adj_train_labels = torch.FloatTensor(target_adj_train + sp.eye(target_adj_train.shape[0]).todense()) # Features are the identity matrix features = sp.eye(adj_train.shape[0]).tolil() features = make_sparse(features) data = { 'adj_norm' : adj_train_norm, 'adj_labels': adj_train_labels, 'features' : features, } data['adj_norm'] = data['adj_norm'].to(device) data['adj_labels'] = data['adj_labels'].to(device) data['features'] = data['features'].to(device) return data
def format_data(data_source): # adj = load_adj('../data/facebook/0') # features = load_attr('../data/facebook/0') # labels = np.ones(adj.shape[0]) # adj, features, labels = load_data2(data_source) adj, features, labels = load_data('twitter') # print(adj) print(type(adj), type(features)) print(adj.shape, features.shape) features = normalize(features, norm='l1', axis=1) print(features[:5]) if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def train_one_graph(adj, adj_orig, features_csr, num_node, k_num, model, opt, placeholders, sess, new_learning_rate, feed_dict, epoch, graph_index): adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop adj_orig.eliminate_zeros() adj_new = adj features = sparse_to_tuple(features_csr.tocoo()) adj_norm, adj_norm_sparse = preprocess_graph(adj_new) adj_label = adj_new + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) ############ # build models adj_clean = adj_orig.tocoo() adj_clean_tensor = tf.SparseTensor(indices=np.stack( [adj_clean.row, adj_clean.col], axis=-1), values=adj_clean.data, dense_shape=adj_clean.shape) ### initial clean and noised_mask clean_mask = np.array([1, 2, 3, 4, 5]) noised_mask = np.array([6, 7, 8, 9, 10]) noised_num = noised_mask.shape[0] / 2 ################################## # feed_dict.update({placeholders["adj"]: adj_norm}) feed_dict.update({placeholders["adj_orig"]: adj_label}) feed_dict.update({placeholders["features"]: features}) node_mask = np.ones([adj.shape[0], n_class]) node_mask[num_node:, :] = 0 feed_dict.update({placeholders['node_mask']: node_mask}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) model.k = k_num ##################################################### t = time.time() ######## if epoch > int( FLAGS.epochs / 2): ## here we can control the manner of new model _ = sess.run([opt.G_min_op], feed_dict=feed_dict, options=run_options) else: _, x_tilde = sess.run([opt.D_min_op, model.realD_tilde], feed_dict=feed_dict, options=run_options) if epoch == int(FLAGS.epochs / 2): noised_indexes, clean_indexes = get_noised_indexes( x_tilde, adj_new, num_node) feed_dict.update({placeholders["noised_mask"]: noised_indexes}) feed_dict.update({placeholders["clean_mask"]: clean_indexes}) feed_dict.update( {placeholders["noised_num"]: len(noised_indexes) / 2}) if epoch % 1 == 0 and graph_index == 0: if epoch > int(FLAGS.epochs / 2): print("This is the generation part") else: print("This is the cluster mask part") print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) G_loss, D_loss, new_learn_rate_value = sess.run( [opt.G_comm_loss, opt.D_loss, new_learning_rate], feed_dict=feed_dict, options=run_options) print("Step: %d,G: loss=%.7f ,L_u: loss= %.7f, LR=%.7f" % (epoch, G_loss, D_loss + 1, new_learn_rate_value)) ########################################## return
def train(): ## add noise label train_adj_list, train_adj_orig_list, train_k_list = add_noises_on_adjs( train_structure_input, train_num_nodes_all) test_adj_list, test_adj_orig_list, test_k_list = add_noises_on_adjs( test_structure_input, test_num_nodes_all) adj = train_adj_list[0] features_csr = train_feature_input[0] features = sparse_to_tuple(features_csr.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_orig = train_adj_orig_list[0] adj_label = train_adj_list[0] + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) num_nodes = adj.shape[0] adj_norm, adj_norm_sparse = preprocess_graph(adj) ############ global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate_dis = tf.train.exponential_decay( FLAGS.learn_rate_init, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learning_rate_gen = tf.train.exponential_decay( FLAGS.learn_rate_init_gen, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learn_rate_value = FLAGS.learn_rate_init # set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0.3, shape=(), name="ph_dropout"), 'clean_mask': tf.placeholder(tf.int32), 'noised_mask': tf.placeholder(tf.int32), 'noised_num': tf.placeholder(tf.int32), 'node_mask': tf.placeholder(tf.float32) } # build models model = None adj_clean = adj_orig.tocoo() adj_clean_tensor = tf.SparseTensor(indices=np.stack( [adj_clean.row, adj_clean.col], axis=-1), values=adj_clean.data, dense_shape=adj_clean.shape) if model_str == "mask_gvae": model = mask_gvae(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate_dis, new_learning_rate_gen, adj_clean=adj_clean_tensor, k=int(adj.sum() * noise_ratio)) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'mask_gvae': opt = Optimizer(preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, global_step=global_steps, new_learning_rate=new_learning_rate_dis, new_learning_rate_gen=new_learning_rate_gen, placeholders=placeholders) # init the session sess = tf.Session() # sess.run(tf.global_variables_initializer()) # initial test # initial clean and noised_mask clean_mask = np.array([1, 2, 3, 4, 5]) noised_mask = np.array([6, 7, 8, 9, 10]) noised_num = noised_mask.shape[0] / 2 # ################################## feed_dict = construct_feed_dict(adj_norm, adj_label, features, clean_mask, noised_mask, noised_num, placeholders) node_mask = np.ones([num_nodes, n_class]) node_mask[train_num_nodes_all[0]:, :] = 0 feed_dict.update({placeholders['node_mask']: node_mask}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # ################################## if if_train: sess.run(tf.global_variables_initializer()) # initial test for epoch in range(FLAGS.epochs): for i in tqdm(range(len(train_feature_input))): train_one_graph(train_adj_list[i], train_adj_orig_list[i], train_feature_input[i], train_num_nodes_all[i], train_k_list[i], model, opt, placeholders, sess, new_learning_rate_gen, feed_dict, epoch, i) saver = tf.train.Saver() # define saver in the loop saver.save(sess, "./checkpoints/{}.ckpt".format(dataset_str)) print("Optimization Finished!") psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) else: saver = tf.train.Saver() # define saver in the loop saver.restore(sess, "./checkpoints/{}.ckpt".format(dataset_str)) psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) ################################## ################## the PSRN and WL ######################### print("#" * 15) print("The PSNR is:") print(np.mean(psnr_list)) print("The WL is :") print(np.mean(wls_list)) return np.mean(psnr_list), np.mean(wls_list)
def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges, test_edges_false): # Settings flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.') flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.') flags.DEFINE_float('weight_decay', 0., 'Weight for L2 loss on embedding matrix.') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_string('model', 'gcn_vae', 'Model string.') flags.DEFINE_integer('features', 1, 'Whether to use features (1) or not (0).') model_str = FLAGS.model #1-dim index array, used in cost function to only focus on those interactions with high confidence mask_index = construct_optimizer_list(features.shape[0], train_edges, train_edges_false) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = 1 norm = 1 #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0]) #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0]) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm, mask=mask_index) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, mask=mask_index) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean, feed_dict=feed_dict) return emb
adj, test_percent=10., val_percent=5.) adj = adj_train # This is the adj matrix that masked out all validation and testing entries. #print(adj_train.shape) #import pdb;pdb.set_trace() if FLAGS.features == 0: features = sp.identity( features.shape[0]) # featureless. sparse coo_matrix. # Some preprocessing #adj_norm = preprocess_graph(adj) attn_adj_norm = adj + sp.eye(adj.shape[0]) attn_adj_norm = sparse_to_tuple(attn_adj_norm) # a tuple adj_norm = preprocess_graph( adj) # a tuple. Normalization. Identical matrix is added here. #print(type(adj + sp.eye(adj.shape[0]))) #import pdb;pdb.set_trace() # Define placeholders placeholders = { # this is passed directly to the model to build the graph. 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'in_drop': tf.placeholder_with_default(0., shape=()), 'attn_drop': tf.placeholder_with_default(0., shape=()), 'feat_drop': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0]
def train(): adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop # adj_orig.eliminate_zeros() # adj_new = randomly_add_edges(adj_orig, k=FLAGS.k) adj_new = adj_orig features_new_csr = randomly_flip_features(features_csr, k=FLAGS.k, seed=seed + 5) feature_new = sparse_to_tuple(features_new_csr.tocoo()) # feature_new = features # features_new_csr =features_csr # features_nonzero = feature_new[1].shape[0] # train GCN first # sizes = [FLAGS.gcn_hidden1, FLAGS.gcn_hidden2, n_class] # surrogate_model = GCN.GCN(sizes, adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id) # surrogate_model.train(adj_norm_sparse_csr, split_train, split_val, node_labels) # ori_acc = surrogate_model.test(split_unlabeled, node_labels, adj_norm_sparse_csr) ####################### the clean and noised GCN ############################ testacc_clean, valid_acc_clean = GCN.run(FLAGS.dataset, adj_orig, features_csr, name="clean") testacc, valid_acc = GCN.run(FLAGS.dataset, adj_new, features_new_csr, name="original") testacc_upper, valid_acc_upper = GCN.run(FLAGS.dataset, adj_new, features_csr, name="upper_bound") ########### print(testacc_clean) print(testacc) print(testacc_upper) ########### ############################################################################## adj_norm, adj_norm_sparse = preprocess_graph(adj_new) adj_norm_sparse_csr = adj_norm_sparse.tocsr() adj_label = adj_new + sp.eye(adj.shape[0]) adj_label_sparse = adj_label adj_label = sparse_to_tuple(adj_label) if_drop_edge = True ## set the checkpoint path checkpoints_dir_base = "./checkpoints" current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S") checkpoints_dir = os.path.join(checkpoints_dir_base, current_time, current_time) ############ global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init, global_step=global_steps, decay_steps=10000, decay_rate=0.98) new_learn_rate_value = FLAGS.learn_rate_init ## set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0., shape=(), name="ph_dropout"), # 'node_labels': tf.placeholder(tf.float32, name = "ph_node_labels"), # 'node_ids' : tf.placeholder(tf.float32, name = "ph_node_ids") } # build models model = None if model_str == "gae_gan": model = gaegan(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'gae_gan': opt = Optimizergaegan( preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), #comm_label=placeholders["comm_label"], model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, global_step=global_steps, new_learning_rate=new_learning_rate) # init the sess sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = "" var_list = tf.global_variables() var_list = [ var for var in var_list if ("encoder" in var.name) or ('generate' in var.name) ] saver = tf.train.Saver(var_list, max_to_keep=10) if if_save_model: os.mkdir(os.path.join(checkpoints_dir_base, current_time)) saver.save(sess, checkpoints_dir) # save the graph if restore_trained_our: checkpoints_dir_our = "./checkpoints" checkpoints_dir_our = os.path.join(checkpoints_dir_our, FLAGS.trained_our_path) # checkpoints_dir_meta = os.path.join(checkpoints_dir_base, FLAGS.trained_our_path, # FLAGS.trained_our_path + ".meta") #saver.restore(sess,tf.train.latest_checkpoint(checkpoints_dir_our)) saver.restore( sess, os.path.join("./checkpoints", "191215231708", "191215231708-1601")) print("model_load_successfully") # else: # if not restore the original then restore the base dis one. # checkpoints_dir_base = os.path.join("./checkpoints/base", FLAGS.trained_base_path) # saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_base)) feed_dict = construct_feed_dict(adj_norm, adj_label, feature_new, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) #### save new_adj without norm############# if restore_trained_our: modified_adj = get_new_adj(feed_dict, sess, model) modified_adj = sp.csr_matrix(modified_adj) sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_new.npz", modified_adj) sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_ori.npz", adj_new) print("save the loaded adj") # print("before training generator") ##################################################### ## get all variables in the model def model_summary(): model_vars = tf.trainable_variables() slim.model_analyzer.analyze_vars(model_vars, print_info=True) model_summary() ##################################################### G_loss_min = 1000 for epoch in range(FLAGS.epochs): t = time.time() # run Encoder's optimizer #sess.run(opt.encoder_min_op, feed_dict=feed_dict) # run G optimizer on trained model if restore_trained_our: sess.run(opt.G_min_op, feed_dict=feed_dict, options=run_options) else: # it is the new model if epoch < FLAGS.epochs: sess.run(opt.G_min_op, feed_dict=feed_dict, options=run_options) # ## ## if epoch % 50 == 0: print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) G_loss, laplacian_para, new_learn_rate_value = sess.run( [opt.G_comm_loss, opt.reg, new_learning_rate], feed_dict=feed_dict, options=run_options) #new_adj = get_new_adj(feed_dict, sess, model) new_adj = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) #temp_ori = adj_norm_sparse.todense().A.reshape(-1) temp_ori = adj_label_sparse.todense().A.reshape(-1) mutual_info = normalized_mutual_info_score(temp_pred, temp_ori) print( "Step: %d,G: loss=%.7f ,Lap_para: %f ,info_score = %.6f, LR=%.7f" % (epoch, G_loss, laplacian_para, mutual_info, new_learn_rate_value)) ## here is the debug part of the model################################# new_features, reg_trace, reg_log, reward_ratio, node_per, fea_per = sess.run( [ model.new_fliped_features, opt.reg_trace, opt.reg_log, opt.percentage_fea, model.node_per, model.fea_per ], feed_dict=feed_dict) print("reg_trace is:") print(reg_trace) print("reg_log is:") print(reg_log) print("reward_percentage") print(reward_ratio) print("New features") print(new_features[5, :20]) print("node_percent") print(node_per) print("fea_per") print(fea_per) new_features_csr = sp.csr_matrix(new_features) ########################################## #';# check the D_loss_min if (G_loss < G_loss_min) and (epoch > 1000) and (if_save_model): saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("min G_loss new") if G_loss < G_loss_min: G_loss_min = G_loss if (epoch % 200 == 1) and if_save_model: saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) saver.save(sess, checkpoints_dir, global_step=FLAGS.epochs, write_meta_graph=True) print("Optimization Finished!") feed_dict.update({placeholders['dropout']: 0}) new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) print((abs(new_adj_sparse - new_adj_sparse.T) > 1e-10).nnz == 0) # new_adj_norm, new_adj_norm_sparse = preprocess_graph(new_adj) # new_adj_norm_sparse_csr = new_adj_norm_sparse.tocsr() # modified_model = GCN.GCN(sizes, new_adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id) # modified_model.train(new_adj_norm_sparse_csr, split_train, split_val, node_labels) # modified_acc = modified_model.test(split_unlabeled, node_labels, new_adj_norm_sparse_csr) testacc_new, valid_acc_new = GCN.run(FLAGS.dataset, new_adj_sparse, features_csr, name="modified") new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) testacc_new2, valid_acc_new = GCN.run(FLAGS.dataset, adj_new, new_features_csr, name="modified2") new_adj = get_new_adj(feed_dict, sess, model) new_adj = new_adj - np.diag(np.diag(new_adj)) new_adj_sparse = sp.csr_matrix(new_adj) testacc_new3, valid_acc_new = GCN.run(FLAGS.dataset, new_adj_sparse, new_features_csr, name="modified3") #np.save("./data/hinton/hinton_new_adj_48_0815.npy", new_adj) #roc_score, ap_score = get_roc_score(test_edges, test_edges_false,feed_dict, sess, model) ##### The final results #### print("*" * 30) print("the final results:\n") print("*" * 30) print("The clean acc is: ") print(testacc_clean) print("*#" * 15) print("The original acc is: ") print(testacc) print("*#" * 15) print("The only modify adj acc is : ") print(testacc_new) print("*#" * 15) print("The only modify feature acc is : ") print(testacc_new2) print("*#" * 15) print("The modify both adj and feature and acc is : ") print(testacc_new3) return new_adj, testacc_clean, testacc, testacc_new, testacc_new2, testacc_new3
showed_target_idx = 0 # the target index group of targets you want to show run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True) ################################### ### read and process the graph model_str = FLAGS.model dataset_str = FLAGS.dataset # Load data # _A_obs, _X_obs, _z_obs = utils.load_npz('data/citeseer.npz') adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( "citeseer") # _A_obs = _A_obs + _A_obs.T #变GCN_ori as GCN # _A_obs[_A_obs > 1] = 1 # adj = _A_obs adj_norm, adj_norm_sparse = preprocess_graph(adj) #_K = _z_obs.max()+1 #类别个数 _K = y_train.shape[1] features_normlize = normalize(features.tocsr(), axis=0, norm='max') features = sp.csr_matrix(features_normlize) # adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) # adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"),
def train(unused): if_drop_edge = True if_save_model = not FLAGS.test if_train_dis = False # if train the community detection while training the generator part restore_trained_our = FLAGS.test showed_target_idx = 0 # the target index group of targets you want to show ################################## ### read and process the graph model_str = FLAGS.model dataset_str = FLAGS.dataset # Load data if FLAGS.dataset == "dblp": adj = sp.load_npz("data/dblp/dblp_medium_adj.npz") features = np.load("data/dblp/dblp_medium_features.npy") features_normlize = normalize(features, axis=0, norm='max') features = sp.csr_matrix(features_normlize) target_list = np.load("data/dblp/dblp_medium_label.npy") elif FLAGS.dataset == "finance": adj = sp.load_npz('./data/finance/Finance_large_adj.npz') features = np.load("data/finance/Finance_large_features.npy") features_normlize = normalize(features, axis=0, norm='max') features = sp.csr_matrix(features_normlize) target_list = np.load("data/finance/Finance_large_label.npy") # Store original adjacency matrix (without diagonal entries) for later a = 1 adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm, adj_norm_sparse = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] cost_val = [] acc_val = [] cost_val = [] acc_val = [] val_roc_score = [] adj_label = adj_orig + sp.eye(adj.shape[0]) adj_label_sparse = adj_label adj_label = sparse_to_tuple(adj_label) if_drop_edge = True ## set the checkpoint path checkpoints_dir_base = "./checkpoints" current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S") checkpoints_dir = os.path.join(checkpoints_dir_base, current_time, current_time) tf.reset_default_graph() global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init, global_step=global_steps, decay_steps=10000, decay_rate=0.98) new_learn_rate_value = FLAGS.learn_rate_init ## set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0., shape=(), name="ph_dropout"), } # build models model = None if model_str == "cdattack": model = cdattack(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate, target_list, FLAGS.alpha, FLAGS.comm_name) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'cdattack': opt = Optimizercdattack(preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, target_list=target_list, global_step=global_steps, new_learning_rate=new_learning_rate) # init the sess sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = "" var_list = tf.global_variables() saver = tf.train.Saver(var_list, max_to_keep=10) if if_save_model: os.mkdir(os.path.join(checkpoints_dir_base, current_time)) saver.save(sess, checkpoints_dir) # save the graph if restore_trained_our: checkpoints_dir_our = "./checkpoints" checkpoints_dir_our = os.path.join(checkpoints_dir_our, FLAGS.trained_our_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_our)) print("model_load_successfully") feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) modified_adj = get_new_adj(feed_dict, sess, model) modified_adj = sp.csr_matrix(modified_adj) ##################################################### G_loss_min = 1000 if FLAGS.test == False: for epoch in range(FLAGS.epochs): t = time.time() if restore_trained_our: sess.run(opt.G_min_op, feed_dict=feed_dict) else: # it is the new model if epoch >= int(FLAGS.epochs / 2): sess.run(opt.G_min_op, feed_dict=feed_dict) if if_train_dis == True: sess.run(opt.D_min_op, feed_dict=feed_dict) # run D optimizer if epoch < int(FLAGS.epochs / 2): sess.run(opt.D_min_op_clean, feed_dict=feed_dict) if epoch % 50 == 0: print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) comm_loss_clean, comm_loss, G_loss, new_learn_rate_value = sess.run( [ opt.D_mincut_loss_clean, opt.D_mincut_loss, opt.G_comm_loss, new_learning_rate ], feed_dict=feed_dict) new_adj = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) temp_ori = adj_label_sparse.todense().A.reshape(-1) print( "Step %d:Loss Lu_clean = %.7f , Loss Lu = %.7f Loss Lg: loss=%.7f , LR=%.7f" % (epoch, comm_loss_clean, comm_loss, G_loss, new_learn_rate_value)) ## check the D_loss_min if (G_loss < G_loss_min) and ( epoch > int(FLAGS.epochs / 2) + 1) and (if_save_model): saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("min G_loss new") if G_loss < G_loss_min: G_loss_min = G_loss if (epoch % 200 == 0) and if_save_model: saver.save(sess, checkpoints_dir, global_step=epoch, write_meta_graph=False) print("Save the model at epoch:", '%04d' % (epoch + 1)) if if_save_model: saver.save(sess, checkpoints_dir, global_step=FLAGS.epochs, write_meta_graph=False) print("Optimization Finished!") new_adj = get_new_adj(feed_dict, sess, model) ##### The final results ###### feed_dict.update({placeholders['dropout']: 0}) pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict) print("*" * 15) print("The modified matrics") print_M1(target_list, pred_dis_res, FLAGS.n_clusters) print("*" * 15) print_M2(target_list, pred_dis_res, FLAGS.n_clusters) print("*" * 15) new_adj = get_new_adj(feed_dict, sess, model) x_tilde_out = model.new_adj_output.eval(session=sess, feed_dict=feed_dict) temp_pred = new_adj.reshape(-1) temp_ori = adj_norm_sparse.todense().A.reshape(-1) return
# pre process data # print("pre processing data...") posts_data = preprocessing.preprocess_text(posts_data) users_data = preprocessing.preprocess_text(users_data) # create network with topics # print("create network") topics = graph.get_topics(users_data, 0.1, 5) network_file_name = SOURCE / 'outputs/bullies_network.csv' graph.create_csv_network_from_topics(network_file_name, topics) network_graph = graph.create_graph(network_file_name) # # pre process network # print("pre processing network...") network_graph = preprocessing.preprocess_graph(network_graph, 0.1) #todo change back to 0.1 graph.graph_attributes(network_graph) # extract nlp features # print("extract nlp features...") feature_list = [ 'post_length', 'tfidf', 'topics', 'screamer', 'words', 'off_dis', 'not_off_dis' ] X_nlp = nlp_feature_extractions.extract_features(users_data, feature_list) y_nlp = (users_data['cb_level'] == 3).astype(int) X_users = nlp_feature_extractions.extract_number_of_posts(posts_data) X_nlp = X_nlp.merge(X_users, on='writer') # extract network features # print("extract network features...")
adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj_Rs = get_adj_01(adj_train.toarray()) adj = adj_train adj_R = [sp.csr_matrix(adj_one) for adj_one in adj_Rs] #若不使用采样得来的特征,则特征矩阵使用单位矩阵 if FLAGS.features == 0: features = sp.identity(features.shape[0]) # 预处理,主要内容写在preprocess模块里了 adj_R_norm = [preprocess_graph(one_adj) for one_adj in adj_R] # placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32), #20 'adj': [tf.sparse_placeholder(tf.float32) for _ in range(20)], 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0]
num_nodes = adj.shape[0] # number of nodes in adjacency matrix num_features = features_shape[ 1] # number of features (columsn of features matrix) features_nonzero = features_tuple[1].shape[ 0] # number of non-zero entries in features matrix (or length of values list) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() np.random.seed(0) # IMPORTANT: guarantees consistent train/test splits adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj, test_frac=.3, val_frac=.1) # Normalize adjacency matrix adj_norm = preprocess_graph(adj_train) # Add in diagonals adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Inspect train/test split print("Total nodes:", adj.shape[0]) print("Total edges:", int( adj.nnz / 2)) # adj is symmetric, so nnz (num non-zero) = 2*num_edges print("Training edges (positive):", len(train_edges)) print("Training edges (negative):", len(train_edges_false)) print("Validation edges (positive):", len(val_edges)) print("Validation edges (negative):", len(val_edges_false)) print("Test edges (positive):", len(test_edges)) print("Test edges (negative):", len(test_edges_false)) # Define hyperparameters
def main(args): """ Train GAE """ # Compute the device upon which to run device = torch.device("cuda" if args.use_cuda else "cpu") print("Using {} dataset".format(args.dataset_str)) # Load data np.random.seed(1) adj, features = load_data(args.dataset_str) N, D = features.shape # Store original adjacency matrix (without diagonal entries) adj_orig = adj adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) # Some preprocessing adj_train_norm = preprocess_graph(adj_train) adj_train_norm = make_sparse(adj_train_norm) adj_train_labels = torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense()) features = make_sparse(features) n_edges = adj_train_labels.sum() data = { 'adj_norm': adj_train_norm, 'adj_labels': adj_train_labels, 'features': features, } gae = GAE(data, n_hidden=32, n_latent=16, dropout=args.dropout) # Send the model and data to the available device gae.to(device) data['adj_norm'] = data['adj_norm'].to(device) data['adj_labels'] = data['adj_labels'].to(device) data['features'] = data['features'].to(device) optimizer = optim.Adam(gae.parameters(), lr=args.lr, betas=(0.95, 0.999), weight_decay=args.weight_decay) # Results results = defaultdict(list) # Full batch training loop for epoch in range(args.num_epochs): t = time.time() gae.train() optimizer.zero_grad() # forward pass output = gae(data['features'], data['adj_norm']) # Compute the loss logits = output targets = data['adj_labels'] loss = gae.norm * F.binary_cross_entropy_with_logits( logits, targets, pos_weight=gae.pos_weight) loss.backward() optimizer.step() results['train_elbo'].append(loss.item()) gae.eval() emb = gae.get_embeddings(data['features'], data['adj_norm']) accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false, emb, adj_orig) results['accuracy_train'].append(accuracy) results['roc_train'].append(roc_curr) results['ap_train'].append(ap_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr), "val_ap=", "{:.5f}".format(ap_curr)) # Test loss if epoch % args.test_freq == 0: with torch.no_grad(): gae.eval() emb = gae.get_embeddings(data['features'], data['adj_norm']) accuracy, roc_score, ap_score = eval_gae( test_edges, test_edges_false, emb, adj_orig) results['accuracy_test'].append(accuracy) results['roc_test'].append(roc_curr) results['ap_test'].append(ap_curr) gae.train() print("Optimization Finished!") with torch.no_grad(): # Test loss gae.eval() emb = emb = gae.get_embeddings(data['features'], data['adj_norm']) accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) print('Test Accuracy: ' + str(accuracy)) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) # Plot plot_results(results, args.test_freq, path=args.dataset_str + "_GAE_results.png")
def format_data_ui(data_name, has_features=1): # Load data fpath_dir = '../data/useritem/%s/' % data_name fpath_input = '%sinput.pkl' % fpath_dir with open(fpath_input, 'rb') as f: (n_users, n_items, item_features, train, valid, test) = pkl.load( f) # here features is not the returned features ui_graph = defaultdict(list) ii_graph = defaultdict(set) ii_graph_list = defaultdict(list) # dict() for edge, value in train.items(): u, i = edge ui_graph[u].append(i) # edge_dict = defaultdict(int) tmp_u_number = len(ui_graph) for index, (u, ilist) in enumerate(ui_graph.items()): if index % 500 == 0: print('user number: %d/%d' % (index, tmp_u_number)) for i in ilist: for j in ilist: # ii_graph[i].add(j) if i != j: edge_dict[(i, j)] += 1 if len(edge_dict) % 5000 == 0: print('len(edge_dict):%d' % len(edge_dict)) print('len(edge_dict):%d' % len(edge_dict)) edge_visit_thresh = 2 for edge, visit_num in edge_dict.items(): i1, i2 = edge if visit_num >= edge_visit_thresh: ii_graph_list[i1].append(i2) # = list(iset) print('%s:get ii mat' % (datetime.datetime.now().isoformat())) adj = nx.adjacency_matrix(nx.from_dict_of_lists(ii_graph_list)) print('adj shape:', adj.get_shape()) # features: lil_matrix features = item_features.tolil() # true_labels: the neighbor truth : not used for me and arga... true_labels = None # --transform over, now follows the original procedure # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) adj = adj_train print('%s:mask test edges over' % (datetime.datetime.now().isoformat())) if has_features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, true_labels, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, adj_orig] feas = {} for item in items: feas[retrieve_name(item)] = item return feas
def main(args): dataset = args.dataset emb_output_dir = args.output epochs = args.epochs agg = args.agg p = args.p tr = args.tr lam = args.lam lose_func = args.loss # Preprocess dataset adj, views_features = load_data(dataset, num_views=3) adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Calculate pairwise simlarity. views_sim_matrix = {} views_feature_matrix = {} for view in list(views_features.keys()): feature_matrix = csc_matrix.todense(views_features[view]) views_feature_matrix.update({view:feature_matrix}) kernal = "rbf" if lose_func == 'all': attr_sim = cal_attr_sim(views_feature_matrix, dataset) else: attr_sim = 0 # split nodes to train, valid and test datasets, # remove test edges from train adjacent matrix. adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(dataset, adj) print("Masking edges Done!") adj = adj_train nx_G = nx.from_numpy_array(adj.toarray()) num_nodes = adj.shape[0] adj_norm = preprocess_graph(adj) views_features_num = {} views_features_nonzero = {} for view in list(views_features.keys()): views_features[view] = sparse_to_tuple(views_features[view].tocoo()) views_features_num.update({view:views_features[view][2][1]}) views_features_nonzero.update({view:views_features[view][1].shape[0]}) # Build model MagCAE = {} for view in list(views_features.keys()): x,y = views_features[view][2][0], views_features[view][2][1] model = GAE(y, views_features_nonzero[view], adj_norm, math.ceil(2*p*y), math.ceil(p*y)) MagCAE.update({view:model}) # Loss function and optimizer. # loss weight taken by each nodes to the total loss. pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) /adj.sum() norm = adj.shape[0] * adj.shape[0] / float(adj.shape[0] * adj.shape[0] - adj.sum())*2 optimizer = tf.keras.optimizers.Adam() adj_targ = adj_train + sp.eye(adj_train.shape[0]) adj_targ = sparse_to_tuple(adj_targ) indices= np.array(adj_targ[0]) values = np.array(adj_targ[1]) dense_shape = np.array(adj_targ[2]) sparse_targ = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) sparse_targ = tf.cast(sparse_targ, dtype=tf.float32) adj_targ = tf.sparse.to_dense(sparse_targ) adj_targ = tf.reshape(adj_targ,[-1]) # Train and Evaluate Model # Training Loop: # In each epoch: views - > view_embedding -> aggregate embedding -> total loss -> update gradients decoder = Decoder(100) for epoch in range(epochs): loss = 0 start = time.time() with tf.GradientTape() as tape: ag_embedding ={} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) ag_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(ag_embedding, agg) # reconstruct a_hat a_hat = decoder(embedding) loss += loss_function(a_hat, adj_targ, pos_weight, norm, attr_sim, embedding, num_nodes, lam, lose_func) if agg == "weighted_concat": variables = MagCAE['view1'].trainable_variables + MagCAE['view2'].trainable_variables + MagCAE['view3'].trainable_variables + aggregator.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) # Evaluate on validate set embedding = np.array(embedding) roc_cur, ap_cur, _, _ = evaluate(val_edges, val_edges_false, adj_orig, embedding) print("Epoch {}: Val_Roc {:.4f}, Val_AP {:.4f}, Time Consumed {:.2f} sec\n".format(epoch+1, roc_cur, ap_cur, time.time()-start)) print("Training Finished!") # Evaluation Result on test Edges test_embedding= {} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) test_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(test_embedding, agg) embedding = np.array(embedding) # embedding is a tensor, convert to np array. # reconstruct a_hat test_roc, test_ap, fpr, tpr = evaluate(test_edges, test_edges_false, adj_orig, embedding) print("MagCAE test result on {}".format(dataset)) print("Test Roc: {}, Test AP: {}, P: {}, Training Ratio: {}, Lambda: {}.".format(test_roc, test_ap, p, tr, lam))
def format_data(data_name, seq_len, time_decay): # Load data adjs, features = load_data(data_name, time_decay) # Store original adjacency matrix (without diagonal entries) for later adj_origs = [] pos_weights = [] norms = [] adj_norms = [] features_sp = [] features_nonzeros = [] num_node = np.array(adjs[0]).shape[1] feature_dim = np.array(features[0]).shape[1] for adj, feature in zip(adjs, features): adj_orig = sparse_to_tuple(adj) pos_weight = float(num_node * num_node - adj_orig[1].sum()) / adj_orig[1].sum() norm = num_node * num_node / float( (num_node * num_node - adj_orig[1].sum()) * 2) feature = sparse_to_tuple(feature) features_nonzero = feature[1].shape[0] adj_norm = preprocess_graph(adj) adj_origs.append(adj_orig) pos_weights.append(pos_weight) norms.append(norm) features_sp.append(feature) features_nonzeros.append(features_nonzero) adj_norms.append(adj_norm) batch_size = len(adj_origs) - seq_len temporal_adj_origs = [] temporal_pos_weights = [] temporal_norms = [] struct_adj_origs = [] struct_pos_weights = [] struct_norms = [] struct_adj_norms = [] struct_features = [] struct_features_nonzeros = [] for i in range(batch_size): temporal_adj_origs.append(adj_origs[i + 1:i + 1 + seq_len]) temporal_pos_weights.append(pos_weights[i + 1:i + 1 + seq_len]) temporal_norms.append(norms[i + 1:i + 1 + seq_len]) struct_adj_origs.append(adj_origs[i:i + seq_len]) struct_pos_weights.append(pos_weights[i:i + seq_len]) struct_norms.append(norms[i:i + seq_len]) struct_adj_norms.append(adj_norms[i:i + seq_len]) struct_features.append(features_sp[i:i + seq_len]) struct_features_nonzeros.append(features_nonzeros[i:i + seq_len]) # temporal_adj_origs = adj_origs[1: 1+seq_len] # temporal_pos_weights = pos_weights[1: 1+seq_len] # temporal_norms = norms[1: 1+seq_len] # # struct_adj_origs = adj_origs[0: 0+seq_len] # struct_pos_weights = pos_weights[0: 0+seq_len] # struct_norms = norms[0: 0+seq_len] # struct_adj_norms = adj_norms[0: 0+seq_len] # struct_features = features_sp[0: 0+seq_len] # struct_features_nonzeros = features_nonzeros[0: 0+seq_len] feas = { 'temporal_adj_origs': temporal_adj_origs, 'temporal_pos_weights': temporal_pos_weights, 'temporal_norms': temporal_norms, 'num_node': num_node, 'feature_dim': feature_dim, 'batch_size': batch_size, 'struct_adj_origs': struct_adj_origs, 'struct_features': struct_features, 'struct_features_nonzeros': struct_features_nonzeros, 'struct_adj_norms': struct_adj_norms, 'struct_pos_weights': struct_pos_weights, 'struct_norms': struct_norms, 'adj_norms': adj_norms, 'features': features_sp } return feas
# temp_adj = adj.todense() # temp_feature = features.todense() # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = adj if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless logging.info('preprocessing data') # Some preprocessing adj_norm = preprocess_graph(adj) logging.info('done preprocessing data') # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] logging.info('create model')
Fa_train = sparse_to_tuple(fea_train.tocoo()) Fa_train = tf.SparseTensorValue(Fa_train[0], Fa_train[1], Fa_train[2]) # Define placeholders placeholders = { 'Fn': tf.sparse_placeholder(tf.float32, (num_nodes, num_nodes + num_features)), 'Fa': tf.sparse_placeholder(tf.float32, (num_nodes, num_features)), 'adj_orig': tf.sparse_placeholder(tf.float32, (num_nodes, num_nodes)), 'features_orig': tf.sparse_placeholder(tf.float32, (num_nodes, num_features)), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model adj_train_mat = preprocess_graph(adj_train) adj_train_mat = tf.cast( tf.SparseTensor(adj_train_mat[0], adj_train_mat[1], adj_train_mat[2]), tf.float32) y_train = tf.cast(y_train, tf.float32) model = SCVA(placeholders, adj_train_mat, num_features, num_nodes, features_nonzero, num_labels, labels_pos, y_train, one_gcn) pos_weight_u = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm_u = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) pos_weight_a = float(features[2][0] * features[2][1] - len(features[1])) / len( features[1]) norm_a = features[2][0] * features[2][1] / float( (features[2][0] * features[2][1] - len(features[1])) * 2)
def main(args): """ Train GAE """ print("Using {} dataset".format(args.dataset_str)) # Load data np.random.seed(1) adj, features = load_data(args.dataset_str) N, D = features.shape # Store original adjacency matrix (without diagonal entries) adj_orig = adj adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) # Some preprocessing adj_train_norm = preprocess_graph(adj_train) adj_train_norm = Variable(make_sparse(adj_train_norm)) adj_train_labels = Variable( torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense())) features = Variable(make_sparse(features)) n_edges = adj_train_labels.sum() data = { 'adj_norm': adj_train_norm, 'adj_labels': adj_train_labels, 'features': features, } gae = GAE(data, n_hidden=32, n_latent=16, dropout=args.dropout, subsampling=args.subsampling) optimizer = Adam({"lr": args.lr, "betas": (0.95, 0.999)}) svi = SVI(gae.model, gae.guide, optimizer, loss="ELBO") # Results results = defaultdict(list) # Full batch training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do ELBO gradient and accumulate loss epoch_loss += svi.step() # report training diagnostics if args.subsampling: normalized_loss = epoch_loss / float(2 * n_edges) else: normalized_loss = epoch_loss / (2 * N * N) results['train_elbo'].append(normalized_loss) # Training loss emb = gae.get_embeddings() accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false, emb, adj_orig) results['accuracy_train'].append(accuracy) results['roc_train'].append(roc_curr) results['ap_train'].append(ap_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(normalized_loss), "train_acc=", "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr), "val_ap=", "{:.5f}".format(ap_curr)) # Test loss if epoch % args.test_freq == 0: emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) results['accuracy_test'].append(accuracy) results['roc_test'].append(roc_curr) results['ap_test'].append(ap_curr) print("Optimization Finished!") # Test loss emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) print('Test Accuracy: ' + str(accuracy)) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) # Plot plot_results(results, args.test_freq, path=args.dataset_str + "_results.png")
def train(placeholders, model, opt, adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, features, sess, name="single_fold"): adj = adj_train pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (2N) print (adj_train.shape) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) # session initialize sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() val_roc_score = [] best_validation = 0.0 num_nodes = adj.shape[0] edges_for_loss = np.ones((num_nodes*num_nodes), dtype=np.float32) ignore_edges = [] edges_to_ignore = np.concatenate((val_edges, val_edges_false, test_edges, test_edges_false), axis=0) for e in edges_to_ignore: ignore_edges.append(e[0]*num_nodes+e[1]) edges_for_loss[ignore_edges] = 0 num_train = num_nodes * num_nodes - len(ignore_edges) last_best_epoch = 0 # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['is_training']: True}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['pos_weight']: pos_weight}) feed_dict.update({placeholders['edges_for_loss']: edges_for_loss}) feed_dict.update({placeholders['num_train']: num_train}) avg_x_cost = 0 if model_str == 'dglfrm': outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z_real, model.z_discrete], feed_dict=feed_dict) # a, b are global parameters a, b = np.log(1 + np.exp(outs[4])), np.log(1 + np.exp(outs[5])) a = np.mean(a) b = np.mean(b) #regularization = round(outs[3], 2) regularization = 0 z_discrete = outs[7] z_real = outs[6] avg_x_cost = outs[3] W = None elif model_str == 'dglfrm_b': outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z], feed_dict=feed_dict) regularization = 0 z_discrete = outs[6] z_real = None avg_x_cost = outs[3] W = None # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] adj_rec, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=1) roc_curr, ap_curr, _ = get_roc_score(adj_rec, val_edges, val_edges_false) print("Epoch:", '%03d' % (epoch + 1), "cost=", "{:.3f}".format(avg_cost), "x_recon_loss=", "{:.2f}".format(avg_x_cost), "val_roc=", "{:.3f}".format(roc_curr), "val_ap=", "{:.3f}".format(ap_curr), 'activated_z=', "{:.1f}".format(z_activated), "time=", "{:.2f}".format(time.time() - t)) roc_curr = round(roc_curr, 3) val_roc_score.append(roc_curr) # Look-ahead epochs: (We may need to train for some more epochs due to nested stochastic nature of the framework.) if FLAGS.early_stopping != 0 and roc_curr > best_validation: # save model print ('Saving model') saver.save(sess=sess, save_path=save_dir+name) best_validation = roc_curr last_best_epoch = 0 if FLAGS.early_stopping != 0 and last_best_epoch > FLAGS.early_stopping: break else: last_best_epoch += 1 print("Optimization Finished!") val_max_index = np.argmax(val_roc_score) print ('---------------------------------') print('Validation ROC Max: {:.3f} at Epoch: {:04d}'.format(val_roc_score[val_max_index], val_max_index)) qual_file = 'data/qual_' + dataset_str + '_' + model_str if model_str == 'dglfrm': np.savez(qual_file, z_discrete=np.asarray(z_discrete), z_real=np.asarray(z_real), z_out=np.asarray(np.multiply(np.round(z_discrete), z_real)), adj_rec=adj_rec) elif model_str == 'dglfrm_b': np.savez(qual_file, z_discrete=np.asarray(z_discrete), adj_rec=adj_rec) if FLAGS.early_stopping != 0: saver.restore(sess=sess, save_path=(save_dir+name)) adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model) return adj_score, z_activated