def get_model(model_str, placeholders, num_features, num_nodes, features_nonzero): model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) return model
num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] sess = tf.Session() # Create model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) gae_model = GCNModelAE(placeholders, num_features, features_nonzero, False, FLAGS.bilinear) # Optimizer with tf.name_scope('optimizer'): opt = OptimizerAE( preds=gae_model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders[ 'adj'], # adj_orig in the original implementation validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) sess = tf.Session() # Initialize session
'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] logging.info('create model') # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) logging.info('optimizer') # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'],
def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges, test_edges_false): # Settings flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.') flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.') flags.DEFINE_float('weight_decay', 0., 'Weight for L2 loss on embedding matrix.') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_string('model', 'gcn_vae', 'Model string.') flags.DEFINE_integer('features', 1, 'Whether to use features (1) or not (0).') model_str = FLAGS.model #1-dim index array, used in cost function to only focus on those interactions with high confidence mask_index = construct_optimizer_list(features.shape[0], train_edges, train_edges_false) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = 1 norm = 1 #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0]) #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0]) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm, mask=mask_index) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, mask=mask_index) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean, feed_dict=feed_dict) return emb
'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero, FLAGS.hidden1, FLAGS.hidden2, FLAGS.hidden3) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, FLAGS.hidden1, FLAGS.hidden2) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'],
num_features = adj.shape[1] # Define placeholders placeholders = { 'features': tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_features]), 'adj_norm': tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_nodes]), 'adj_orig': tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_nodes]), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model model = GCNModelAE(placeholders, num_features, num_nodes, args) # Initialize session sess = tf.Session() # Train model saver = tf.train.Saver() # model_name = "./models/brain_vgae_100_50_autoencoder=False_kl_coefficient=0.001_act=tanh.ckpt" model_name = "./models/brain_vgae_100_50_autoencoder=True.ckpt" print("Analyzing " + model_name) with tf.Session() as sess: saver.restore(sess, model_name) features_batch = np.zeros([args.batch_size, num_nodes, num_features],
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features, y_test, tx, ty, test_maks, true_labels = load_data( args.dataset_str) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Before proceeding further, make the structure for doing deepWalk if args.dw == 1: print('Using deepWalk regularization...') G = load_edgelist_from_csr_matrix(adj_orig, undirected=True) print("Number of nodes: {}".format(len(G.nodes()))) num_walks = len(G.nodes()) * args.number_walks print("Number of walks: {}".format(num_walks)) data_size = num_walks * args.walk_length print("Data size (walks*length): {}".format(data_size)) # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) # adj_label = torch.DoubleTensor(adj_label.toarray()) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) if args.model == 'gcn_vae': model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout) else: model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.dw == 1: sg = SkipGram(args.hidden2, adj.shape[0]) optimizer_dw = optim.Adam(sg.parameters(), lr=args.lr_dw) # Construct the nodes for doing random walk. Doing it before since the seed is fixed nodes_in_G = list(G.nodes()) chunks = len(nodes_in_G) // args.number_walks random.Random().shuffle(nodes_in_G) hidden_emb = None for epoch in tqdm(range(args.epochs)): t = time.time() model.train() optimizer.zero_grad() z, mu, logvar = model(features, adj_norm) # After back-propagating gae loss, now do the deepWalk regularization if args.dw == 1: sg.train() if args.full_number_walks > 0: walks = build_deepwalk_corpus(G, num_paths=args.full_number_walks, path_length=args.walk_length, alpha=0, rand=random.Random(SEED)) else: walks = build_deepwalk_corpus_iter( G, num_paths=args.number_walks, path_length=args.walk_length, alpha=0, rand=random.Random(SEED), chunk=epoch % chunks, nodes=nodes_in_G) for walk in walks: if args.context == 1: # Construct the pairs for predicting context node # for each node, treated as center word curr_pair = (int(walk[center_node_pos]), []) for center_node_pos in range(len(walk)): # for each window position for w in range(-args.window_size, args.window_size + 1): context_node_pos = center_node_pos + w # make soure not jump out sentence if context_node_pos < 0 or context_node_pos >= len( walk ) or center_node_pos == context_node_pos: continue context_node_idx = walk[context_node_pos] curr_pair[1].append(int(context_node_idx)) else: # first item in the walk is the starting node curr_pair = (int(walk[0]), [ int(context_node_idx) for context_node_idx in walk[1:] ]) if args.ns == 1: neg_nodes = [] pos_nodes = set(walk) while len(neg_nodes) < args.walk_length - 1: rand_node = random.randint(0, n_nodes - 1) if rand_node not in pos_nodes: neg_nodes.append(rand_node) neg_nodes = torch.from_numpy(np.array(neg_nodes)).long() # Do actual prediction src_node = torch.from_numpy(np.array([curr_pair[0]])).long() tgt_nodes = torch.from_numpy(np.array(curr_pair[1])).long() optimizer_dw.zero_grad() log_pos = sg(src_node, tgt_nodes, neg_sample=False) if args.ns == 1: loss_neg = sg(src_node, neg_nodes, neg_sample=True) loss_dw = log_pos + loss_neg else: loss_dw = log_pos loss_dw.backward(retain_graph=True) cur_dw_loss = loss_dw.item() optimizer_dw.step() loss = loss_function(preds=model.dc(z), labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() hidden_emb = mu.data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) if args.dw == 1: tqdm.write( "Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}" .format(epoch + 1, cur_loss, cur_dw_loss, ap_curr, time.time() - t)) else: tqdm.write( "Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}". format(epoch + 1, cur_loss, ap_curr, time.time() - t)) if (epoch + 1) % 10 == 0: tqdm.write("Evaluating intermediate results...") kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb) tqdm.write("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('Test ROC score: ' + str(roc_score)) tqdm.write('Test AP score: ' + str(ap_score)) kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) if args.plot == 1: cm.plotClusters(tqdm, hidden_emb, true_labels)
def runner(self): model_str = FLAGS.model placeholders = [{ 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features': tf.placeholder(tf.float32), 'features_nonzero': tf.placeholder(tf.float32), 'pos_weight': tf.placeholder(tf.float32), 'norm': tf.placeholder(tf.float32), 'reward': tf.placeholder(tf.float32), 'D_W1': tf.placeholder_with_default( tf.zeros([FLAGS.g_hidden2, FLAGS.d_hidden1]), shape=[FLAGS.g_hidden2, FLAGS.d_hidden1]), 'D_W2': tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1, 1]), shape=[FLAGS.d_hidden1, 1]), 'D_b1': tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1]), shape=[FLAGS.d_hidden1]), 'D_b2': tf.placeholder_with_default(tf.zeros([1]), shape=[1]), }, { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features': tf.sparse_placeholder(tf.float32), 'features_nonzero': tf.placeholder(tf.float32), 'pos_weight': tf.placeholder(tf.float32), 'norm': tf.placeholder(tf.float32), 'reward': tf.placeholder(tf.float32) }] sess = tf.Session() real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) self.D_W1 = tf.Variable(xavier_init([FLAGS.g_hidden2, FLAGS.d_hidden1])) self.D_b1 = tf.Variable(xavier_init([FLAGS.d_hidden1])) self.D_W2 = tf.Variable(xavier_init([FLAGS.d_hidden1, 1])) self.D_b2 = tf.Variable(xavier_init([1])) d_vars = [self.D_W1, self.D_b1, self.D_W2, self.D_b2] print('train for the network embedding...') # Load data dataset_str1 = 'Douban_offline' # 1118 nodes dataset_str2 = 'Douban_online' # 3906 nodes adj1, features1, fea_num1 = load_data(dataset_str1) adj2, features2, fea_num2 = load_data(dataset_str2) num_features = [features1.shape[1], features2.shape[1]] model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, sess) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) # Optimizer with tf.name_scope('optimizer'): opt = OptimizerAE( preds=[model.reconstructions1, model.reconstructions2], labels=[ tf.reshape( tf.sparse_tensor_to_dense(placeholders[0]['adj_orig'], validate_indices=False), [-1]), tf.reshape( tf.sparse_tensor_to_dense(placeholders[1]['adj_orig'], validate_indices=False), [-1]) ], preds_attribute=[ model.attribute_reconstructions1, model.attribute_reconstructions1 ], labels_attribute=[ tf.sparse_tensor_to_dense(placeholders[0]['features']), tf.sparse_tensor_to_dense(placeholders[1]['features']) ], pos_weight=[ placeholders[0]['pos_weight'], placeholders[1]['pos_weight'] ], norm=[placeholders[0]['norm'], placeholders[1]['norm']], fake_logits=model.fake_logits, alpha=FLAGS.AX_alpha) real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) real_logits, fake_logits = self.discriminator(real_X, fake_X) real_prob = tf.reduce_mean(real_logits) fake_prob = tf.reduce_mean(fake_logits) D_loss = -real_prob + fake_prob dis_optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate_dis) # Adam Optimizer opt_dis = dis_optimizer.minimize(D_loss, var_list=d_vars) sess.run(tf.global_variables_initializer()) final_emb1 = [] final_emb2 = [] emb1_id = [] emb2_id = [] local_A_1 = adj1 local_X_1 = features1 local_A_2 = adj2 local_X_2 = features2 adj_norm_1 = preprocess_graph(local_A_1) local_X_1 = sparse_to_tuple(local_X_1.tocoo()) pos_weight_1 = float(local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) / local_A_1.sum() adj_label_1 = local_A_1 + sp.eye(local_A_1.shape[0]) adj_label_1 = sparse_to_tuple(adj_label_1) norm_1 = local_A_1.shape[0] * local_A_1.shape[0] / float( (local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) * 2) adj_norm_2 = preprocess_graph(local_A_2) local_X_2 = sparse_to_tuple(local_X_2.tocoo()) pos_weight_2 = float(local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) / local_A_2.sum() adj_label_2 = local_A_2 + sp.eye(local_A_2.shape[0]) adj_label_2 = sparse_to_tuple(adj_label_2) norm_2 = local_A_2.shape[0] * local_A_2.shape[0] / float( (local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) * 2) self.tmp_count = {} for epoch in range(FLAGS.epoch): for circle_epoch in range(FLAGS.circle_epoch): for G_epoch in range(FLAGS.g_epoch): # ------------------------------------------------------------------------------------------ feed_dict = construct_feed_dict( [adj_norm_2, adj_norm_1], [adj_label_2, adj_label_1], [local_X_2, local_X_1], [pos_weight_2, pos_weight_1], [norm_2, norm_1], placeholders) feed_dict.update( {placeholders[0]['D_W1']: sess.run(self.D_W1)}) feed_dict.update( {placeholders[0]['D_W2']: sess.run(self.D_W2)}) feed_dict.update( {placeholders[0]['D_b1']: sess.run(self.D_b1)}) feed_dict.update( {placeholders[0]['D_b2']: sess.run(self.D_b2)}) _, embeddings1_, embeddings2_, gcn_cost, fake_prob_, attr_cost = sess.run( [ opt.opt_op, model.embeddings1, model.embeddings2_, opt.cost, model.fake_prob, opt.attribute_cost ], feed_dict=feed_dict) for D_epoch in range(FLAGS.d_epoch): feed_dict.update( {placeholders[0]['dropout']: FLAGS.dropout}) emb1, emb2 = sess.run( [model.embeddings1, model.embeddings2_], feed_dict=feed_dict) _, real_prob_, fake_prob_ = sess.run( [opt_dis, real_prob, fake_prob], feed_dict={ real_X: emb1, fake_X: emb2 }) if epoch % 1 == 0: emb1, emb2 = sess.run([model.embeddings1, model.embeddings2_], feed_dict=feed_dict) final_emb1 = np.array(emb1) final_emb2 = np.array(emb2) similar_matrix = cosine_similarity(final_emb1, final_emb2) self.similar_matrix = similar_matrix pair = {} gnd = np.loadtxt("data/douban_truth.emb") count = {} topk = [1, 5, 10, 20, 30, 50] for i in range(len(topk)): pair[topk[i]] = [] count[topk[i]] = 0 self.tmp_count[topk[i]] = 0 for top in topk: for index in range(similar_matrix.shape[0]): top_index = heapq.nlargest( int(top), range(len(similar_matrix[index])), similar_matrix[index].take) top_index = list(map(lambda x: x + 1, top_index)) pair[top].append([index + 1, top_index]) for ele_1 in gnd: for ele_2 in pair[top]: if ele_1[0] == ele_2[0]: if ele_1[1] in ele_2[1]: count[top] += 1 print( f'-----------------------epoch {epoch}------------------------' ) for top in topk: print("top", '%02d' % (top), "count=", '%d' % (count[top]), "precision=", "{:.5f}".format(count[top] / len(gnd))) print( f'-----------------------epoch {epoch}------------------------' )
adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # Model and optimizer model = GCNModelAE(nfeat=features.shape[1], nhid=args.hidden, nclass=args.nclass, dropout=args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) indices = [] losses = [] def train(epoch): with torch.autograd.set_detect_anomaly(True): t = time.time() model.train() optimizer.zero_grad()
features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] sess = tf.Session() # Create model model = None if FLAGS.multihead_attn: model = MultiHeadedGAE(placeholders, num_features, features_nonzero) else: if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero, FLAGS.attention, FLAGS.bilinear) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, FLAGS.attention) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE( preds=model.reconstructions, labels=tf.reshape(
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features, y_test, tx, ty, test_maks, true_labels = load_data(args.dataset_str) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) # adj_label = torch.DoubleTensor(adj_label.toarray()) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) if args.model == 'gcn_vae': model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout) else: model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) hidden_emb = None for epoch in tqdm(range(args.epochs)): t = time.time() model.train() optimizer.zero_grad() z, mu, logvar = model(features, adj_norm) loss = loss_function(preds=model.dc(z), labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() hidden_emb = mu.data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format( epoch + 1, cur_loss, ap_curr, time.time() - t)) if (epoch + 1) % 10 == 0: tqdm.write("Evaluating intermediate results...") kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb) tqdm.write("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('Test ROC score: ' + str(roc_score)) tqdm.write('Test AP score: ' + str(ap_score)) kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) if args.plot == 1: cm.plotClusters(tqdm, hidden_emb, true_labels)
np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) with open('test1.pkl', 'rb') as f: x = pickle.load(f) print(x.shape) adj, features = load_data(x, False) G = nx.from_numpy_matrix(adj.toarray()) adj_train = preprocess_graph(adj) model = GCNModelAE(nfeat=features.shape[1], nhid=args.hidden, nclass=args.ndim, dropout=args.dropout) model.load_state_dict(torch.load(args.saved_model)) model.eval() output = model(features, adj_train) output = try_data(output) output = output.detach().numpy() # Normalize the output data data = output # scaler = StandardScaler().fit(output) # data = scaler.transform(output) # Convert to pandas meta_df = {}
# Load data adj, features = load_data(dataset_str) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train if args.features == 0: features = sp.identiy(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create Model model = None if model_str == 'gcn_ae': model = GCNModelAE(num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(num_features, num_nodes, features_nonzero)
}, { # 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'features': tf.sparse_placeholder(tf.float32,shape=tf.constant(A2[2], dtype=tf.int64)), }] dropout = tf.placeholder(tf.float32) num_nodes = [S1_ori[2][0],S2_ori[2][0]] num_features = [A1[2][1],A2[2][1]] features_nonzero = [A1[1].shape[0],A2[1].shape[0]] # Create model #map_model = Discriminator() # model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features = num_features, features_nonzero = features_nonzero,dropout=dropout,flag=True) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features = num_features,num_nodes=num_nodes, features_nonzero = features_nonzero,dropout=dropout) pos_weight1 = float(S1.shape[0] * S1.shape[0] - S1.sum()) / S1.sum() norm1 = S1.shape[0] * S1.shape[0] / float((S1.shape[0] * S1.shape[0] - S1.sum()) * 2) pos_weight2 = float(S2.shape[0] * S2.shape[0] - S2.sum()) / S2.sum() norm2 = S2.shape[0] * S2.shape[0] / float((S2.shape[0] * S2.shape[0] - S2.sum()) * 2) pos_weight = [pos_weight1,pos_weight2] norm = [norm1,norm2] # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae':
'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # tf.placeholder num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.sparse_tensor_to_dense( placeholders['adj_orig'], pos_weight=pos_weight,