def train_step(encoder, decoder, inp, targ, targ_lang, enc_hidden, BATCH_SIZE): loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = encoder(inp, enc_hidden) dec_hidden = enc_hidden dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1) # 以文本长度为主,遍历所有词语 for t in range(1, targ.shape[1]): # 将编码器输出 (enc_output) 传送至解码器 predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) # 这里输入的是一个batch loss += optimizer.loss_function(targ[:, t], predictions) # 教师强制 - 将目标词作为下一个输入,一个batch的循环 dec_input = tf.expand_dims(targ[:, t], 1) batch_loss = (loss / int(targ.shape[1])) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.optimizer.apply_gradients(zip(gradients, variables)) return batch_loss
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features = load_data(args.dataset_str) # print(features) # exit(0) n_nodes, feat_dim = features.shape print("#nodes={}".format(n_nodes)) # Store original adjacency matrix (without diagonal entries) for later print( "Store original adjacency matrix (without diagonal entries) for later..." ) adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() print("Sample the edges for training and testing...") adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Some preprocessing print("Some preprocessing...") adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) hidden_emb = None for epoch in range(args.epochs): t = time.time() model.train() optimizer.zero_grad() recovered, mu, logvar = model(features, adj_norm) pos_weight = torch.Tensor([pos_weight]) loss = loss_function(preds=recovered, labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() hidden_emb = mu.data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t)) print("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score))
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features = load_data(args.dataset_str) features = features.to(args.device) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) adj_norm = adj_norm.to(args.device) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) adj_label = torch.FloatTensor(adj_label.toarray()) adj_orig_tile = adj_label.unsqueeze(2).repeat(1, 1, args.K) adj_orig_tile = adj_orig_tile.to(args.device) pos_weight = torch.tensor( float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()).float().to(device=args.device) norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) psi_input_dim = args.noise_dim + feat_dim logv_input_dim = feat_dim model = GCNModelVAE(psi_input_dim, logv_input_dim, args.hidden1, args.hidden2, args.dropout, args.K, args.J, args.noise_dim, args.device).to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): warm_up = torch.min(torch.FloatTensor([epoch / 300, 1])).to(args.device) t = time.time() model.train() optimizer.zero_grad() reconstruct_iw, log_prior_iw, log_H_iw, psi_iw_vec = model( features, adj_norm) hidden_emb = psi_iw_vec.data.contiguous().cpu().numpy() loss = loss_function(reconstructed_iw=reconstruct_iw, log_prior_iw=log_prior_iw, log_H_iw=log_H_iw, adj_orig_tile=adj_orig_tile, nodes=n_nodes, K=args.K, pos_weight=pos_weight, norm=norm, warm_up=warm_up, device=args.device) loss.backward() cur_loss = loss.item() optimizer.step() roc_score_val, ap_score_val = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) roc_score_test, ap_score_test = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) print('Epoch:', '%04d ---> ' % (epoch + 1), 'training_loss = {:.5f} '.format(cur_loss), 'val_AP = {:.5f} '.format(ap_score_val), 'val_ROC = {:.5f} '.format(roc_score_val), 'test_AP = {:.5f} '.format(ap_score_test), 'test_ROC = {:.5f} '.format(roc_score_test), 'time = {:.5f} '.format(time.time() - t)) writer.add_scalar('Loss/train_loss', cur_loss, epoch) writer.add_scalar('Average Precision/test', ap_score_test, epoch) writer.add_scalar('Average Precision/val', ap_score_val, epoch) writer.add_scalar('Area under Roc(AUC)/test', roc_score_test, epoch) writer.add_scalar('Area under Roc(AUC)/val', roc_score_val, epoch) print("Optimization Finished!")
def gae_for(args): print("Using {} dataset".format(args.dataset)) if args.dataset == 'cora': n_clusters = 7 Cluster = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=0) elif args.dataset == 'citeseer': n_clusters = 6 Cluster = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=0) elif args.dataset == 'pubmed': n_clusters = 3 Cluster = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=0) elif args.dataset == 'wiki': n_clusters = 17 Cluster = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=0) adj, features, true_labels, idx_train, idx_val, idx_test = load_data( args.dataset) n_nodes, feat_dim = features.shape dims = [feat_dim] + args.dims layers = args.linlayers # Store original adjacency matrix (without diagonal entries) for later adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() #adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) n = adj.shape[0] adj_norm_s = preprocess_graph(adj, args.gnnlayers, norm='sym', renorm=True) sm_fea_s = sp.csr_matrix(features).toarray() print('Laplacian Smoothing...') for a in adj_norm_s: sm_fea_s = a.dot(sm_fea_s) adj_1st = (adj + sp.eye(n)).toarray() db, best_acc, best_nmi, best_adj = clustering(Cluster, sm_fea_s, true_labels) best_cl = db adj_label = torch.FloatTensor(adj_1st) model = LinTrans(layers, dims) optimizer = optim.Adam(model.parameters(), lr=args.lr) sm_fea_s = torch.FloatTensor(sm_fea_s) adj_label = adj_label.reshape([ -1, ]) if args.cuda: model.cuda() inx = sm_fea_s.cuda() adj_label = adj_label.cuda() pos_num = len(adj.indices) neg_num = n_nodes * n_nodes - pos_num up_eta = (args.upth_ed - args.upth_st) / (args.epochs / args.upd) low_eta = (args.lowth_ed - args.lowth_st) / (args.epochs / args.upd) pos_inds, neg_inds = update_similarity(normalize(sm_fea_s.numpy()), args.upth_st, args.lowth_st, pos_num, neg_num) upth, lowth = update_threshold(args.upth_st, args.lowth_st, up_eta, low_eta) bs = min(args.bs, len(pos_inds)) length = len(pos_inds) pos_inds_cuda = torch.LongTensor(pos_inds).cuda() print('Start Training...') for epoch in tqdm(range(args.epochs)): st, ed = 0, bs batch_num = 0 model.train() length = len(pos_inds) while (ed <= length): sampled_neg = torch.LongTensor( np.random.choice(neg_inds, size=ed - st)).cuda() sampled_inds = torch.cat((pos_inds_cuda[st:ed], sampled_neg), 0) t = time.time() optimizer.zero_grad() xind = sampled_inds // n_nodes yind = sampled_inds % n_nodes x = torch.index_select(inx, 0, xind) y = torch.index_select(inx, 0, yind) zx = model(x) zy = model(y) batch_label = torch.cat( (torch.ones(ed - st), torch.zeros(ed - st))).cuda() batch_pred = model.dcs(zx, zy) loss = loss_function(adj_preds=batch_pred, adj_labels=batch_label, n_nodes=ed - st) loss.backward() cur_loss = loss.item() optimizer.step() st = ed batch_num += 1 if ed < length and ed + bs >= length: ed += length - ed else: ed += bs if (epoch + 1) % args.upd == 0: model.eval() mu = model(inx) hidden_emb = mu.cpu().data.numpy() upth, lowth = update_threshold(upth, lowth, up_eta, low_eta) pos_inds, neg_inds = update_similarity(hidden_emb, upth, lowth, pos_num, neg_num) bs = min(args.bs, len(pos_inds)) pos_inds_cuda = torch.LongTensor(pos_inds).cuda() tqdm.write("Epoch: {}, train_loss_gae={:.5f}, time={:.5f}".format( epoch + 1, cur_loss, time.time() - t)) db, acc, nmi, adjscore = clustering(Cluster, hidden_emb, true_labels) if db >= best_cl: best_cl = db best_acc = acc best_nmi = nmi best_adj = adjscore tqdm.write("Optimization Finished!") tqdm.write('best_acc: {}, best_nmi: {}, best_adj: {}'.format( best_acc, best_nmi, best_adj))
hidden_emb = None t = time.time() model.train() optimizer.zero_grad() features_bs = features_bs.cuda() adj_norm = adj_norm.cuda() # print('features_bs :') # print(features_bs) # print('adj_norm :') # print(adj_norm) recovered, mu, logvar = model(features_bs, adj_norm) loss = loss_function(preds=recovered, labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) cpu_loss = loss.cpu() cur_loss_list.append(cpu_loss.data.numpy().tolist()) loss.backward() # cur_loss = loss.item() optimizer.step() hidden_emb = mu.cpu().data.numpy() print(cur_loss_list) print(len(cur_loss_list)) # one epoch one mean loss
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features, y_test, tx, ty, test_maks, true_labels = load_data( args.dataset_str) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Before proceeding further, make the structure for doing deepWalk if args.dw == 1: print('Using deepWalk regularization...') G = load_edgelist_from_csr_matrix(adj_orig, undirected=True) print("Number of nodes: {}".format(len(G.nodes()))) num_walks = len(G.nodes()) * args.number_walks print("Number of walks: {}".format(num_walks)) data_size = num_walks * args.walk_length print("Data size (walks*length): {}".format(data_size)) # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) # adj_label = torch.DoubleTensor(adj_label.toarray()) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) if args.model == 'gcn_vae': model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout) else: model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.dw == 1: sg = SkipGram(args.hidden2, adj.shape[0]) optimizer_dw = optim.Adam(sg.parameters(), lr=args.lr_dw) # Construct the nodes for doing random walk. Doing it before since the seed is fixed nodes_in_G = list(G.nodes()) chunks = len(nodes_in_G) // args.number_walks random.Random().shuffle(nodes_in_G) hidden_emb = None for epoch in tqdm(range(args.epochs)): t = time.time() model.train() optimizer.zero_grad() z, mu, logvar = model(features, adj_norm) # After back-propagating gae loss, now do the deepWalk regularization if args.dw == 1: sg.train() if args.full_number_walks > 0: walks = build_deepwalk_corpus(G, num_paths=args.full_number_walks, path_length=args.walk_length, alpha=0, rand=random.Random(SEED)) else: walks = build_deepwalk_corpus_iter( G, num_paths=args.number_walks, path_length=args.walk_length, alpha=0, rand=random.Random(SEED), chunk=epoch % chunks, nodes=nodes_in_G) for walk in walks: if args.context == 1: # Construct the pairs for predicting context node # for each node, treated as center word curr_pair = (int(walk[center_node_pos]), []) for center_node_pos in range(len(walk)): # for each window position for w in range(-args.window_size, args.window_size + 1): context_node_pos = center_node_pos + w # make soure not jump out sentence if context_node_pos < 0 or context_node_pos >= len( walk ) or center_node_pos == context_node_pos: continue context_node_idx = walk[context_node_pos] curr_pair[1].append(int(context_node_idx)) else: # first item in the walk is the starting node curr_pair = (int(walk[0]), [ int(context_node_idx) for context_node_idx in walk[1:] ]) if args.ns == 1: neg_nodes = [] pos_nodes = set(walk) while len(neg_nodes) < args.walk_length - 1: rand_node = random.randint(0, n_nodes - 1) if rand_node not in pos_nodes: neg_nodes.append(rand_node) neg_nodes = torch.from_numpy(np.array(neg_nodes)).long() # Do actual prediction src_node = torch.from_numpy(np.array([curr_pair[0]])).long() tgt_nodes = torch.from_numpy(np.array(curr_pair[1])).long() optimizer_dw.zero_grad() log_pos = sg(src_node, tgt_nodes, neg_sample=False) if args.ns == 1: loss_neg = sg(src_node, neg_nodes, neg_sample=True) loss_dw = log_pos + loss_neg else: loss_dw = log_pos loss_dw.backward(retain_graph=True) cur_dw_loss = loss_dw.item() optimizer_dw.step() loss = loss_function(preds=model.dc(z), labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() hidden_emb = mu.data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) if args.dw == 1: tqdm.write( "Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}" .format(epoch + 1, cur_loss, cur_dw_loss, ap_curr, time.time() - t)) else: tqdm.write( "Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}". format(epoch + 1, cur_loss, ap_curr, time.time() - t)) if (epoch + 1) % 10 == 0: tqdm.write("Evaluating intermediate results...") kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb) tqdm.write("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('Test ROC score: ' + str(roc_score)) tqdm.write('Test AP score: ' + str(ap_score)) kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) if args.plot == 1: cm.plotClusters(tqdm, hidden_emb, true_labels)
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features, y_test, tx, ty, test_maks, true_labels = load_data(args.dataset_str) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) # adj_label = torch.DoubleTensor(adj_label.toarray()) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) if args.model == 'gcn_vae': model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout) else: model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr) hidden_emb = None for epoch in tqdm(range(args.epochs)): t = time.time() model.train() optimizer.zero_grad() z, mu, logvar = model(features, adj_norm) loss = loss_function(preds=model.dc(z), labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() hidden_emb = mu.data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format( epoch + 1, cur_loss, ap_curr, time.time() - t)) if (epoch + 1) % 10 == 0: tqdm.write("Evaluating intermediate results...") kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb) tqdm.write("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('Test ROC score: ' + str(roc_score)) tqdm.write('Test AP score: ' + str(ap_score)) kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) if args.plot == 1: cm.plotClusters(tqdm, hidden_emb, true_labels)
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) # adj, features, y_test, tx, ty, test_maks, true_labels = load_data('cora') # print(true_labels) # adj, features, y_test, test_maks, true_labels=load_npz('amazon_electronics_photo') # print(true_labels) # adj=preprocess_high_order_adj( adj, 2, 0.01 ) # print(adj) # if args.dataset_split == 'jknet': g, features, true_labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data( args.dataset_str, None, 0.6, 0.2) adj = g.adj(scipy_fmt='coo') true_labels = true_labels.detach().numpy() # print(true_labels) # else: # g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data( # args.dataset_str, args.dataset_split) args.n_clusters = true_labels.max() + 1 print(args.n_clusters, "ssssssss") # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) # adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) # adj_norm = torch.sparse.FloatTensor(sp.coo_matrix(adj)) # adj_norm=torch.tensor(adj.todense(),dtype=torch.float) # print(adj_norm) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) z_x = torch.zeros(features.shape[0], args.hidden1) z_w = torch.zeros(features.shape[0], args.hidden2) z_shuffle = torch.cat((features, z_x, z_w), axis=1) n_nodes, feat_dim = z_shuffle.shape model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout, args.n_clusters) optimizer = optim.Adam(model.parameters(), lr=args.lr) z_x, mu_x, _, z_w, mu_w, _, _, logvar_px, qz = model(z_shuffle, adj_norm) z_shuffle = torch.cat((features, z_x.detach_(), z_w.detach_()), axis=1) hidden_emb = None for epoch in tqdm(range(args.epochs)): t = time.time() model.train() optimizer.zero_grad() # z_shuffle=torch.cat((features,z_x.detach_(),z_w.detach_()),axis=1) z_x, mu_x, logvar_x, z_w, mu_w, logvar_w, mu_px, logvar_px, qz = model( z_shuffle, adj_norm) # print(z_x.shape,"z_x.shape") # After back-propagating gae loss, now do the deepWalk regularization # mu_x = mu_x.unsqueeze(-1) # mu_x = mu_x.expand(-1, args.hidden2) logvar_x1 = logvar_x.unsqueeze(-1) logvar_x1 = logvar_x1.expand(-1, args.hidden2, args.n_clusters) mu_x1 = mu_x.unsqueeze(-1) mu_x1 = mu_x1.expand(-1, args.hidden2, args.n_clusters) if torch.cuda.is_available(): mu_x1 = mu_x1.cuda() logvar_x1 = logvar_x1.cuda() # KLD_W = -0.5 / n_nodes* torch.sum(1 + logvar_w - mu_w.pow(2) - logvar_w.exp()) # KLD_Z = -torch.sum(qz * torch.log(qz + 1e-10))/n_nodes KLD_Z = -0.5 / n_nodes * torch.mean( torch.sum(1 + qz * torch.log(qz + 1e-10), 1)) # print(KLD_Z,"klz") # qz = qz.unsqueeze(-1) # qz = qz.expand(-1, 1) # print(logvar_px.shape,logvar_x1.shape,"hhhhi") # KLD_QX_PX = 0.5 / n_nodes* (((logvar_px - logvar_x) + ((logvar_x.exp() + (mu_x - mu_px).pow(2))/logvar_px.exp())) - 1) # # print(KLD_QX_PX.shape,qz.shape,"hhhhi") # KLD_QX_PX = KLD_QX_PX.unsqueeze(1) # qz = qz.unsqueeze(-1) # print(KLD_QX_PX.shape,qz.shape,"hhhhi") # KLD_QX_PX = KLD_QX_PX.expand(2708, 1, args.hidden2) KLD_QX_PX = loss_function(preds=model.dc(z_x), labels=adj_label, mu=(mu_x1 - mu_px), logvar=(logvar_px - logvar_x1), n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) KLD_QX_PX = KLD_QX_PX = KLD_QX_PX.expand(n_nodes, 1, args.hidden2) E_KLD_QX_PX = torch.sum( torch.bmm(KLD_QX_PX, qz.unsqueeze(-1) / n_nodes)) # print(E_KLD_QX_PX) # print(model.dc(z_x).shape,adj_label.shape,"hdhhhhhhd") model.train() optimizer.zero_grad() lbl_1 = torch.ones(n_nodes) lbl_2 = torch.zeros(n_nodes) lbl = torch.cat((lbl_1, lbl_2)) idx = np.random.permutation(n_nodes) # print(features.shape,z_x.shape,adj_norm.shape) shuf_fts = z_shuffle[idx, :] # FeatHL=torch.cat((features,shuf_fts),axis=1) # _, featHL_dim = FeatHL.shape # modelHL = GCNModelVAE(featHL_dim, args.hidden1, args.hidden2, args.dropout,2) n_nodes1, feat_dim1 = z_shuffle.shape # model1 = GCNModelVAE(feat_dim1, args.hidden1, args.hidden2, args.dropout,args.n_clusters) # z_xL1, mu_xL1, logvar_xL1,z_wL1, mu_wL1, logvar_wL1,mu_pxL1, logvar_pxL1,_ = model1(z_shuffle, adj_norm) z_xL2, mu_xL2, logvar_xL2, z_wL2, mu_wL2, logvar_wL2, mu_pxL2, logvar_pxL2, qz2 = model( shuf_fts, adj_norm) KLD_Z2 = 0.5 / n_nodes * torch.mean( torch.sum(1 + qz2 * torch.log(qz2 + 1e-10), 1)) KLD_QX_PX2 = loss_function(preds=model.dc(z_wL2), labels=adj_label, mu=mu_wL2, logvar=logvar_wL2, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) KLD_QX_PX2 = KLD_QX_PX2.expand(n_nodes, 1, args.hidden2) E_KLD_QX_PX2 = torch.sum( torch.bmm(KLD_QX_PX2, qz2.unsqueeze(-1) / n_nodes)) lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label, mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight))+\ (1.0/E_KLD_QX_PX2)+KLD_Z2 # z_xL2, mu_xL2, logvar_xL2,z_wL2, mu_wL2, logvar_wL2,mu_pxL2, logvar_pxL2,qz2 = model(shuf_fts, adj_norm) # lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label, # mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes, # norm=norm, pos_weight=pos_weight))+\ # (1.0/E_KLD_QX_PX2)+ KLD_Z2+lossF # lossF = loss_functionShuffle(preds=model.dc(z_xL2), labels=adj_label, # mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes, # norm=norm, pos_weight=pos_weight)+\ # loss_functionShuffle(preds=model.dc(z_wL2), labels=adj_label, # mu=mu_wL2, logvar=logvar_wL2, n_nodes=n_nodes, # norm=norm, pos_weight=pos_weight) # LossF=Variable(torch.tensor(lossF).type(torch.FloatTensor),requires_grad=True) # lossF.backward() loss = loss_function( preds=model.dc(z_x), labels=adj_label, mu=mu_x, logvar=logvar_x, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) + loss_function( preds=model.dc(z_w), labels=adj_label, mu=mu_w, logvar=logvar_w, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) + lossF + KLD_Z + E_KLD_QX_PX # if lossF<0.02: # break # lossF.backward() # HL=np.concatenate((mu_xL1.data.numpy(),mu_wL1.data.numpy()),axis=1) # HL2=np.concatenate((mu_xL2.data.numpy(),mu_wL2.data.numpy()),axis=1) # kmeans = KMeans(n_clusters=2, random_state=0).fit(HL) # kmeans2 = KMeans(n_clusters=2, random_state=0).fit(HL2) # predict_labels = kmeans.predict(HL) # predict_labels2 = kmeans.predict(HL2) # pr=np.amax(kmeans.fit_transform(HL), axis=1) # pr2=np.amax(kmeans.fit_transform(HL2), axis=1) # pr=torch.cat((torch.tensor(pr), torch.tensor(pr2))) # b_xent = nn.BCEWithLogitsLoss() # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(lbl)) # print(lossF) # print(loss, lossF) loss.backward(retain_graph=True) cur_loss = loss.item() optimizer.step() hidden_emb = np.concatenate((mu_x.data.numpy(), mu_w.data.numpy()), axis=1) # hidden_emb=mu_x.data.numpy() # print(hidden_emb.shape) # roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) # if args.dw == 1: # tqdm.write("Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}".format( # epoch + 1, cur_loss, cur_dw_loss, # ap_curr, time.time() - t)) # else: # tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format( # epoch + 1, cur_loss, # ap_curr, time.time() - t)) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) # # tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) wandb.log({"roc_score1": roc_score}) wandb.log({"ap_score1": ap_score}) if (epoch + 1) % 10 == 0: tqdm.write("Evaluating intermediate results...") kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) # print(np.argmax(kmeans.fit_transform(hidden_emb), axis=1).shape) pr = np.amax(kmeans.fit_transform(hidden_emb), axis=1) b_xent = nn.BCEWithLogitsLoss() print(loss, lossF) # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(true_labels)) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score)) # np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb) print(loss, lossF) print("Kmeans ACC", purity_score(true_labels, predict_labels)) # roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) # tqdm.write('Test ROC score: ' + str(roc_score)) # tqdm.write('Test AP score: ' + str(ap_score)) tqdm.write("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) tqdm.write('Test ROC score: ' + str(roc_score)) tqdm.write('Test AP score: ' + str(ap_score)) kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb) predict_labels = kmeans.predict(hidden_emb) cm = clustering_metrics(true_labels, predict_labels) cm.evaluationClusterModelFromLabel(tqdm) print("Kmeans ACC", purity_score(true_labels, predict_labels)) if args.plot == 1: cm.plotClusters(tqdm, hidden_emb, true_labels)
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) # Set tensor dtype to float16 # torch.set_default_tensor_type(torch.HalfTensor) adj, features = load_data(args.dataset_str) _, n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) adj_label = torch.FloatTensor(adj_label.toarray()) pos_weight = torch.tensor( [float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()]) norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) model = GCNModelSIGVAE(args.edim, feat_dim, args.hidden1, args.hidden2, args.dropout, encsto=args.encsto, gdc=args.gdc, ndist=args.noise_dist, copyK=args.K, copyJ=args.J, device=args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr) hidden_emb = None model.to(args.device) features = features.to(args.device) adj_norm = adj_norm.to(args.device) adj_label = adj_label.to(args.device) pos_weight = pos_weight.to(args.device) for epoch in range(args.epochs): t = time.time() model.train() optimizer.zero_grad() recovered, mu, logvar, z, z_scaled, eps, rk, snr = model( features, adj_norm) loss_rec, loss_prior, loss_post = loss_function(preds=recovered, labels=adj_label, mu=mu, logvar=logvar, emb=z, eps=eps, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) WU = np.min([epoch / 300., 1.]) reg = (loss_post - loss_prior) * WU / (n_nodes**2) loss_train = loss_rec + WU * reg # loss_train = loss_rec loss_train.backward() cur_loss = loss_train.item() cur_rec = loss_rec.item() # cur_rec_bce = loss_rec1.item() optimizer.step() hidden_emb = z_scaled.detach().cpu().numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, val_edges, val_edges_false, args.gdc) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss), "rec_loss=", "{:.5f}".format(cur_rec), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t)) # print(rk.detach().cpu().numpy()) cur_snr = snr.detach().cpu().numpy() print("SNR: ", cur_snr) if ((epoch + 1) % args.monit == 0): model.eval() recovered, mu, logvar, z, z_scaled, eps, rk, _ = model( features, adj_norm) hidden_emb = z_scaled.detach().cpu().numpy() roc_score, ap_score = get_roc_score(hidden_emb, test_edges, test_edges_false, args.gdc) rslt = "Test ROC score: {:.4f}, Test AP score: {:.4f}\n".format( roc_score, ap_score) print("\n", rslt, "\n") with open("results.txt", "a+") as f: f.write(rslt) print("Optimization Finished!")