def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # print('feas:{}'.format(feas)) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'], cat=self.cat) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) fh = open('recoder_link_prediction.txt', 'a') fh.write('Epoch=%04d, train_loss=%.5f, val_roc=%.5f, val_ap=%.5f' % (epoch + 1, avg_cost, val_roc_score[-1], ap_curr)) fh.write('\r\n') if (epoch + 1) % 10 == 0: lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) fh.write('Test ROC score=%f, Test AP score=%f' % (roc_score, ap_score)) fh.write('\r\n') fh.flush() fh.close()
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] # Train model max_acc = 0 for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) # print(emb[feas['test_mask']].shape) # print(feas['y_train'].shape) # print(feas['y_test'].shape) if (epoch + 1) % 1 == 0: # train_mask = ~feas['test_mask'] lin_model = LogisticRegression().fit( emb[feas['train_mask']], np.argmax(feas['y_train'], axis=1)) # lm_test = nodepred_metrics(feas['test_mask'], lin_model.predict(emb['test_mask'])) ac_score = accuracy_score( np.argmax(feas['y_test'], axis=1), lin_model.predict(emb[feas['test_mask']])) if ac_score > max_acc: max_acc = ac_score print('Accuracy: ' + str(ac_score)) print('Max Accuracy: ' + str(max_acc))
def erun(self): # formatted data feas = format_data(self.data_name, self.train_length, FLAGS.time_decay) # Define placeholders placeholders = get_placeholder(feas['struct_adj_norms'][0]) # construct model ae_model = get_model(placeholders, feas['feature_dim'], feas['struct_features_nonzeros'][0], feas['num_node'], self.train_length) # Optimizer opt = get_optimizer(ae_model, placeholders, self.train_length) # Initialize session gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=True)) sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): for i in range(1): # mean only use the first batch; avg_cost, feed_dict, struct_loss, temporal_loss = update( opt, sess, feas, i, placeholders) print('dataname ', self.data_name, ' epoch ', epoch, 'batch ', i, 'total ', avg_cost, 'struct ', struct_loss, 'temporal ', temporal_loss) embeddings = predict(ae_model, sess, feas, placeholders) embeddings = np.reshape( np.array(embeddings)[:, -1, :], [feas['num_node'], FLAGS.hidden3[-1]]) print(embeddings) time_decay = FLAGS.time_decay alpha = FLAGS.alpha emb = FLAGS.hidden3 emb = '-'.join(list(map(str, emb))) subdir = emb + '-' + str(time_decay) + '-' + str(alpha) write_path = '/home/huawei/risehuang/paper2/gcn_tcn/embedding/{}/'.format( subdir) if not os.path.exists(write_path): os.mkdir(write_path) write_path = '/home/huawei/risehuang/paper2/gcn_tcn/embedding/{}/{}/'.format( subdir, self.train_length) if not os.path.exists(write_path): os.mkdir(write_path) np.savetxt(write_path + '{}.txt'.format(self.data_name), embeddings)
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) val_roc_score = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) if (epoch + 1) % 10 == 0: lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) sess.close()
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): emb, _ = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) if (epoch + 1) % 2 == 0: kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) print("Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb) cm = clustering_metrics(feas['true_labels'], predict_labels) cm.evaluationClusterModelFromLabel() kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) predict_labels = kmeans.predict(emb) import numpy as np np.save('results/emb.npy', emb, allow_pickle=False) np.save('results/labels.npy', predict_labels, allow_pickle=False) sess.close()
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) print("node num:" + str(feas['num_nodes'])) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) # lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) # roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) # val_roc_score.append(roc_curr) #print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) if (epoch + 1) % 10 == 0: print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost)) #lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) #roc_score, ap_score,_ = lm_test.get_roc_score(emb, feas) #print('Test ROC score: ' + str(roc_score)) #print('Test AP score: ' + str(ap_score)) df = pd.DataFrame(emb) df.to_csv('output/{}-embedding.csv'.format(self.data_name), index=True, sep=',')
def erun(self): model_str = self.model # load data feas = format_data(self.data_name) print("feature number: {}".format(feas['num_features'])) # Define placeholders placeholders = get_placeholder() # construct model gcn_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, gcn_model, placeholders, feas['num_nodes'], FLAGS.alpha) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] # Train model for epoch in range(1, self.iteration + 1): reconstruction_errors, reconstruction_loss = update( gcn_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) if epoch % 10 == 0: print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(reconstruction_loss)) if epoch % 100 == 0: y_true = [label[0] for label in feas['labels']] auc = roc_auc_score(y_true, reconstruction_errors) print(auc) sorted_errors = np.argsort(-reconstruction_errors, axis=0) with open('output/{}-ranking.txt'.format(self.data_name), 'w') as f: for index in sorted_errors: f.write("%s\n" % feas['labels'][index][0]) df = pd.DataFrame({'AD-GCA': reconstruction_errors}) df.to_csv('output/{}-scores.csv'.format(self.data_name), index=False, sep=',')
def erun(self): model_str = self.model # load data feas = format_data(self.data_name) #print("feature number: {}".format(feas['num_features'])) # Define placeholders placeholders = get_placeholder() # construct model gcn_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, gcn_model, placeholders, feas['num_nodes'], self.alpha) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) # Train model for epoch in range(1, self.iteration + 1): reconstruction_errors, reconstruction_loss = update( gcn_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) ''' if epoch % 10 == 0: print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(reconstruction_loss)) if epoch % 100 == 0: y_true = [label[0] for label in feas['labels']] auc = roc_auc_score(y_true, reconstruction_errors) print(auc) ''' y_true = [label[0] for label in feas['labels']] auc = roc_auc_score(y_true, reconstruction_errors) return [self.data_name, self.alpha, auc] '''
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'], num_classes=self.n_clusters, cat=self.cat) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): if model_str in ['arga', 'arvga', 'vgcg']: emb, avg_loss = update_with_gan(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) elif model_str in ['gae', 'vgae', 'vgc']: emb, avg_loss = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) else: print('ERROR: model has not be included!') if (epoch+1) % 1 == 0: kmeans = KMeans(n_clusters=self.n_clusters, random_state=10).fit(emb) print("Epoch: {:04d} Loss: {:.4f}".format(epoch + 1, avg_loss)) predict_labels = kmeans.predict(emb) # predict_labels = classes cm = clustering_metrics(feas['true_labels'], predict_labels) # cm.evaluationClusterModelFromLabel() acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore = cm.evaluationClusterModelFromLabel() print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore)) self._vis(emb, feas['true_labels'], self.data_name, self.n_clusters, label_flag= True) self._vis(emb, predict_labels, self.data_name, self.n_clusters, label_flag= False)
def erun(self, adj, features): tf.reset_default_graph() model_str = self.model # formatted data feas = format_data_new(adj, coo_matrix(features)) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], features) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost)) if (epoch + 1) == 5: break return emb
features_sp = sparse_to_tuple(features_lil.tocoo()) num_features = features_sp[2][1] features_nonzero = features_sp[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = 1 adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # In[2]: # Define placeholders placeholders = get_placeholder(adj) d_real, discriminator, ae_model = get_model(placeholders, num_features, num_nodes, features_nonzero, attr_labels_list[-1], dim_attr) opt = get_optimizer(ae_model, discriminator, placeholders, pos_weight, norm, d_real, num_nodes, attr_labels_list) # In[3]: #train model preds_all = None labels_all = None sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(FLAGS.epochs): emb, emb_long, avg_cost, attr_loss, pri_loss, link_loss = update( ae_model, opt, sess, adj_norm, adj_label, features_tuple, placeholders, adj)
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders # 定义placeholders,get_placeholder函数中只需要传入一个参数,即adj,函数中需要用到adj.shape placeholders = get_placeholder(feas['adj'], feas['num_features']) #定义由Dpp和密度估计出来的混合高斯 DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()}) #DPP.sample_exact_k_dpp(size=4) pca = PCA(n_components=FLAGS.hidden2) #index = DPP.list_of_samples[0] if self.data_name == 'cora': DPP.sample_exact_k_dpp(size=21) index = DPP.list_of_samples[0] pass elif self.data_name == 'citeseer': index = np.array([ 1782, 741, 3258, 3189, 3112, 2524, 2895, 1780, 1100, 2735, 1318, 2944, 1825, 18, 987, 2564, 463, 6, 3173, 701, 1901, 2349, 2786, 2412, 646, 2626, 2648, 1793, 432, 538, 1729, 1217, 1397, 1932, 2850, 458, 2129, 702, 2934, 2030, 2882, 1393, 308, 1271, 1106, 2688, 629, 1145, 3251, 1903, 1004, 1149, 1385, 285, 858, 2977, 844, 335, 532, 404, 3174, 528 ]) elif self.data_name == 'pubmed': index = np.array( [842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230]) feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample) featuresCompress = np.array([feature_sample[i] for i in index]) #featuresCompress = np.array(feature_sample) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) # construct model d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model( model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) # Initialize session #config = tf.ConfigProto() #config.gpu_options.allow_growth = True #sess = tf.Session(config = config) sess = tf.Session() sess.run(tf.global_variables_initializer()) val_roc_score = [] record = [] record_emb = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print( "Epoch:", '%04d' % (epoch + 1), "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}" .format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) if (epoch + 1) % 10 == 0: lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) record.append([roc_score, ap_score]) record_emb.append(emb) rec = np.array(record) index = rec[:, 0].tolist().index(max(rec[:, 0].tolist())) emb = record_emb[index] ana = record[index] scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), { 'embedded': emb, 'labels': feas['true_labels'] }) print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
def erun(self): tf.reset_default_graph() model_str = self.model # formatted data feas = format_data(self.data_name) placeholders = get_placeholder(feas['adjs'], feas['numView']) # construct model ae_model = get_model(model_str, placeholders, feas['numView'], feas['num_features'], feas['num_nodes'], self.n_clusters) # Optimizer opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True #设置tf模式为按需赠长模式 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # Train model pos_weights = feas['pos_weights'] fea_pos_weights = feas['fea_pos_weights'] for epoch in range(self.iterations): reconstruct_loss = update(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) % 10 == 0: emb_ind = update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=pos_weights, fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind) print("PAP Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb_ind) #print('emb1', emb_ind[1]) label_num = count_num(predict_labels) print('view1 label_num:', label_num) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(reconstruct_loss) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind) y_pred_last = kmeans.labels_ cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last) acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel( ) init_cluster = tf.constant(kmeans.cluster_centers_) sess.run( tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster)) q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) for epoch in range(self.kl_iterations): emb, kl_loss = update_kl(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], p, placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0., idx=idx, label=label_mask(feas['true_labels'])) if epoch % 10 == 0: kmeans = KMeans(n_clusters=self.n_clusters).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(kl_loss) if epoch % 5 == 0: q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) y_pred = q.argmax(1) delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = y_pred print('delta_label', delta_label) print("Epoch:", '%04d' % (epoch + 1)) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) if epoch > 0 and delta_label < self.tol: print("early_stop") break print('NMI', NMIs) print('loss', loss) return acc, f1_macro, precision_macro, nmi, adjscore
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj'], feas['num_features']) #定义由Dpp和密度估计出来的混合高斯 DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()}) #DPP.sample_exact_k_dpp(size=4) pca = PCA(n_components = FLAGS.hidden2) #index = DPP.list_of_samples[0] if self.data_name == 'cora': DPP.sample_exact_k_dpp(size=24) index = DPP.list_of_samples[0] elif self.data_name == 'citeseer': #''' index = np.array([481, 1763, 1701, 171, 1425, 842])#epoch 36时最高 0.571 #''' #''' index = np.array([3165, 589, 1283, 1756, 2221, 2409])#50时可以达到0.545 #''' #''' index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 #''' '''index = np.array([1718, 3241, 787, 2727, 624, 3110, 1503, 1867, 2410, 1594, 1203, 2711, 171, 1790, 1778, 294, 685, 39, 1700, 2650, 2028, 2573, 375, 2744, 2302, 1876, 784, 2233, 2546, 1793, 1677, 3278, 2587, 2623, 1018, 1160, 3166, 668, 1663, 3007, 864, 2893, 743, 3129, 3104, 3277, 1643, 3047, 322, 298, 2894, 35, 2578, 2031, 3316, 1815, 361, 1868, 1546, 1895, 1514, 636])#这个性能最高''' elif self.data_name == 'pubmed': index = np.array([ 842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230, 14052]) feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample) featuresCompress = np.array([feature_sample[i] for i in index]) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) # construct model d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config = config) sess.run(tf.global_variables_initializer()) #record list record = [] record_emb = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense']) if (epoch+1) % 2 == 0: record_emb.append(emb) kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) print("Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb) cm = clustering_metrics(feas['true_labels'], predict_labels) [a,b,c] = cm.evaluationClusterModelFromLabel() record.append([a,b,c]) rec = np.array(record) index = rec[:,0].tolist().index(max(rec[:,0].tolist())) ana = record[index] print('------------------------------------',index) emb = record_emb[index] scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb, 'labels':feas['true_labels']}) print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))
def erun(self): tf.reset_default_graph() model_str = self.model # formatted data feas = format_data(self.data_name) placeholders = get_placeholder(feas['adjs'], feas['numView']) # construct model ae_model = get_model(model_str, placeholders, feas['numView'], feas['num_features'], feas['num_nodes'], self.n_clusters) # Optimizer opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True #设置tf模式为按需赠长模式 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # Train model pos_weights = feas['pos_weights'] fea_pos_weights = feas['fea_pos_weights'] for epoch in range(self.warm_iteration): reconstruct_loss = warm_update(ae_model, opt, sess, feas['numView'], feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) == 50: emb = warm_update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=feas['pos_weights'], fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) avg_emb = (emb[0] + emb[1]) / 2 kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[0]) print("Epoch:", '%04d' % (epoch + 1)) predict_labels0 = kmeans.predict(emb[0]) label_num = count_num(predict_labels0) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels0) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) #enc = preprocessing.OneHotEncoder() #onehot_predict0 = enc.fit_transform(predict_labels0.reshape(-1, 1)) #Q = eng.modul(adjs0, onehot_predict) #print('view0 Q', Q) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[1]) print("Epoch:", '%04d' % (epoch + 1)) predict_labels1 = kmeans.predict(emb[1]) label_num = count_num(predict_labels1) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels1) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) #onehot_predict1 = enc.fit_transform(predict_labels1.reshape(-1, 1)) #Q = eng.modul(adjs1, onehot_predict) #print('view1 Q', Q) #scio.savemat('acm_modurity.mat', {'adj0':feas['adjs_label'][0],'onehot_predict0':onehot_predict0, 'adj1':feas['adjs_label'][1],'onehot_predict1':onehot_predict1}) NMIs.append(nmi) print('NMIs', NMIs) print('warm up done!') for epoch in range(self.iterations): reconstruct_loss = update(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) % 10 == 0: emb_ind = update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=pos_weights, fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb_ind[FLAGS.input_view]) print("PAP Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb_ind[FLAGS.input_view]) #print('emb1', emb_ind[1]) label_num = count_num(predict_labels) print('view1 label_num:', label_num) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(reconstruct_loss) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb_ind[FLAGS.input_view]) y_pred_last = kmeans.labels_ cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last) acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel( ) init_cluster = tf.constant(kmeans.cluster_centers_) sess.run( tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster)) q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) for epoch in range(self.kl_iterations): emb, kl_loss = update_kl(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], p, placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0., idx=idx, label=label_mask(feas['true_labels'])) if epoch % 10 == 0: kmeans = KMeans(n_clusters=self.n_clusters).fit( emb[FLAGS.input_view]) predict_labels = kmeans.predict(emb[FLAGS.input_view]) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(kl_loss) if epoch % 5 == 0: q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) y_pred = q.argmax(1) delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = y_pred print('delta_label', delta_label) print("Epoch:", '%04d' % (epoch + 1)) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb[FLAGS.input_view]) predict_labels = kmeans.predict(emb[FLAGS.input_view]) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) if epoch > 0 and delta_label < self.tol: print("early_stop") break print('NMI', NMIs) print('loss', loss) save_embed(emb[FLAGS.input_view], 'emb_10.txt') return acc, f1_macro, precision_macro, nmi, adjscore