def train(self): for i in range(self.epochs): print(f"epoch:{i}") pred_top, pred_att = self.train_loop() # measure accuracy on the train edges top_acc_function = tf.keras.metrics.BinaryAccuracy() top_acc_function.update_state(self.y_actual[0], pred_top) top_acc_function = top_acc_function.result().numpy() # measure the accuracy on the attributes att_acc_function = tf.keras.metrics.BinaryAccuracy() att_acc_function.update_state(self.y_actual[1].flatten(), tf.reshape(pred_att, [-1])) att_train_accuracy = att_acc_function.result().numpy() print(f"train top acc: {top_acc_function}") print(f"train att acc: {att_train_accuracy}") # get the labels from the embedding layer pred_labels_z = self.Z_np.argmax(1) pred_labels_x = self.X2_np.argmax(1) # get the accuracy pf the predicted labels cm = clustering_metrics(self.clustering_labels, pred_labels_z) res = cm.clusteringAcc() print("acc_z:{}, f1_z:{}".format(res[0], res[1])) cm = clustering_metrics(self.clustering_labels, pred_labels_x) res = cm.clusteringAcc() print("acc_x:{}, f1_x:{}".format(res[0], res[1]))
def clustering_metrics(self, n_runs=10, compare_node_types=True): loader = self.trainvalidtest_dataloader() X_all, y_all, _ = next(iter(loader)) self.cpu().forward(preprocess_input(X_all, device="cpu")) if not isinstance(self._embeddings, dict): self._embeddings = { list(self._node_ids.keys())[0]: self._embeddings } embeddings_all, types_all, y_true = self.dataset.get_embeddings_labels( self._embeddings, self._node_ids) # Record metrics for each run in a list of dict's res = [ {}, ] * n_runs for i in range(n_runs): y_pred = self.dataset.predict_cluster(n_clusters=len( y_true.unique()), seed=i) if compare_node_types and len(self.dataset.node_types) > 1: res[i].update( clustering_metrics( y_true=types_all, # Match y_pred to type_all's index y_pred=types_all.index.map( lambda idx: y_pred.get(idx, "")), metrics=[ "homogeneity_ntype", "completeness_ntype", "nmi_ntype" ])) if y_pred.shape[0] != y_true.shape[0]: y_pred = y_pred.loc[y_true.index] res[i].update( clustering_metrics( y_true, y_pred, metrics=["homogeneity", "completeness", "nmi"])) res_df = pd.DataFrame(res) metrics = res_df.mean(0).to_dict() return metrics
def evaluate(emb, labels, K=2): kmeans = KMeans(K, random_state=0).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(labels, predict_labels) cm.evaluationClusterModelFromLabel() with open( '/home/zmm/advGraph/nettack-master/ourDefense/clusterLabel/dw_labels_polblogs', 'wb') as f: pkl.dump(predict_labels, f)
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): emb, _ = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) if (epoch + 1) % 2 == 0: kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) print("Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb) cm = clustering_metrics(feas['true_labels'], predict_labels) cm.evaluationClusterModelFromLabel() kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) predict_labels = kmeans.predict(emb) import numpy as np np.save('results/emb.npy', emb, allow_pickle=False) np.save('results/labels.npy', predict_labels, allow_pickle=False) sess.close()
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj']) # construct model d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'], num_classes=self.n_clusters, cat=self.cat) # Optimizer opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes']) # Initialize session sess = tf.Session() # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(tf.global_variables_initializer()) # Train model for epoch in range(self.iteration): if model_str in ['arga', 'arvga', 'vgcg']: emb, avg_loss = update_with_gan(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) elif model_str in ['gae', 'vgae', 'vgc']: emb, avg_loss = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj']) else: print('ERROR: model has not be included!') if (epoch+1) % 1 == 0: kmeans = KMeans(n_clusters=self.n_clusters, random_state=10).fit(emb) print("Epoch: {:04d} Loss: {:.4f}".format(epoch + 1, avg_loss)) predict_labels = kmeans.predict(emb) # predict_labels = classes cm = clustering_metrics(feas['true_labels'], predict_labels) # cm.evaluationClusterModelFromLabel() acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore = cm.evaluationClusterModelFromLabel() print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore)) self._vis(emb, feas['true_labels'], self.data_name, self.n_clusters, label_flag= True) self._vis(emb, predict_labels, self.data_name, self.n_clusters, label_flag= False)
def main(X, av, gnd, m, a, k, ind): N = X.shape[0] begin_time_filter = time() types = len(np.unique(gnd)) S, begin_time = FGC_cora_modified(X, av, gnd, a, k, ind) D = np.sum(S, axis=1) D = np.power(D, -0.5) D[np.isinf(D)] = 0 D[np.isnan(D)] = 0 D = np.diagflat(D) # (m,m) S_hat = D.dot(S) # (m,n) S_hat_tmp = S_hat.dot(S_hat.T) # (m,m) S_hat_tmp[np.isinf(S_hat_tmp)] = 0 S_hat_tmp[np.isnan(S_hat_tmp)] = 0 # sigma, E = scipy.linalg.eig(S_hat_tmp) E, sigma, v = sp.linalg.svds(S_hat_tmp, k=types, which='LM') sigma = sigma.T sigma = np.power(sigma, -0.5) sigma[np.isinf(sigma)] = 0 sigma[np.isnan(sigma)] = 0 sigma = np.diagflat(sigma) C_hat = (sigma.dot(E.T)).dot(S_hat) C_hat[np.isinf(C_hat)] = 0 C_hat[np.isnan(C_hat)] = 0 C_hat = C_hat.astype(float) kmeans = KMeans(n_clusters=types, random_state=37).fit(C_hat.T) predict_labels = kmeans.predict(C_hat.T) cm = clustering_metrics(gnd, predict_labels) ac, nm, f1, adj = cm.evaluationClusterModelFromLabel(m, a, k) end_time = time() tot_time = end_time - begin_time tot_time_filter = end_time - begin_time_filter return ac, nm, f1, adj, tot_time, tot_time_filter
# v -> right singular values u, s, v = sp.linalg.svds(feature, k=k, which='LM') predict_labels = None for i in range(rep): # run kmeans on the current features kmeans = KMeans(n_clusters=k).fit(u) # get the predicted labels predict_labels = kmeans.predict(u) # measure the intra distance of the clusters intraD[i] = square_dist(predict_labels, feature) #intraD[i] = dist(predict_labels, feature) # measure the accuracy, F1-score and nmi cm = clustering_metrics(gnd, predict_labels) ac[i], nm[i], f1[i] = cm.evaluationClusterModelFromLabel() # save the scores intra_list.append(np.mean(intraD)) acc_list.append(np.mean(ac)) stdacc_list.append(np.std(ac)) nmi_list.append(np.mean(nm)) stdnmi_list.append(np.std(nm)) f1_list.append(np.mean(f1)) stdf1_list.append(np.std(f1)) print(f'power: {power}', f'intra_dist: {intra_list[-1]}', f'acc_mean: {acc_list[-1]}', f'acc_std: {stdacc_list[-1]}',
def erun(self): tf.reset_default_graph() model_str = self.model # formatted data feas = format_data(self.data_name) placeholders = get_placeholder(feas['adjs'], feas['numView']) # construct model ae_model = get_model(model_str, placeholders, feas['numView'], feas['num_features'], feas['num_nodes'], self.n_clusters) # Optimizer opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True #设置tf模式为按需赠长模式 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # Train model pos_weights = feas['pos_weights'] fea_pos_weights = feas['fea_pos_weights'] for epoch in range(self.iterations): reconstruct_loss = update(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) % 10 == 0: emb_ind = update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=pos_weights, fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind) print("PAP Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb_ind) #print('emb1', emb_ind[1]) label_num = count_num(predict_labels) print('view1 label_num:', label_num) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(reconstruct_loss) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind) y_pred_last = kmeans.labels_ cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last) acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel( ) init_cluster = tf.constant(kmeans.cluster_centers_) sess.run( tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster)) q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) for epoch in range(self.kl_iterations): emb, kl_loss = update_kl(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], p, placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0., idx=idx, label=label_mask(feas['true_labels'])) if epoch % 10 == 0: kmeans = KMeans(n_clusters=self.n_clusters).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(kl_loss) if epoch % 5 == 0: q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) y_pred = q.argmax(1) delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = y_pred print('delta_label', delta_label) print("Epoch:", '%04d' % (epoch + 1)) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) if epoch > 0 and delta_label < self.tol: print("early_stop") break print('NMI', NMIs) print('loss', loss) return acc, f1_macro, precision_macro, nmi, adjscore
def erun(self): model_str = self.model # formatted data feas = format_data(self.data_name) # Define placeholders placeholders = get_placeholder(feas['adj'], feas['num_features']) #定义由Dpp和密度估计出来的混合高斯 DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()}) #DPP.sample_exact_k_dpp(size=4) pca = PCA(n_components = FLAGS.hidden2) #index = DPP.list_of_samples[0] if self.data_name == 'cora': DPP.sample_exact_k_dpp(size=24) index = DPP.list_of_samples[0] elif self.data_name == 'citeseer': #''' index = np.array([481, 1763, 1701, 171, 1425, 842])#epoch 36时最高 0.571 #''' #''' index = np.array([3165, 589, 1283, 1756, 2221, 2409])#50时可以达到0.545 #''' #''' index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 #''' '''index = np.array([1718, 3241, 787, 2727, 624, 3110, 1503, 1867, 2410, 1594, 1203, 2711, 171, 1790, 1778, 294, 685, 39, 1700, 2650, 2028, 2573, 375, 2744, 2302, 1876, 784, 2233, 2546, 1793, 1677, 3278, 2587, 2623, 1018, 1160, 3166, 668, 1663, 3007, 864, 2893, 743, 3129, 3104, 3277, 1643, 3047, 322, 298, 2894, 35, 2578, 2031, 3316, 1815, 361, 1868, 1546, 1895, 1514, 636])#这个性能最高''' elif self.data_name == 'pubmed': index = np.array([ 842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230, 14052]) feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample) featuresCompress = np.array([feature_sample[i] for i in index]) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) # construct model d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) # Optimizer opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config = config) sess.run(tf.global_variables_initializer()) #record list record = [] record_emb = [] # Train model for epoch in range(self.iteration): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense']) if (epoch+1) % 2 == 0: record_emb.append(emb) kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb) print("Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb) cm = clustering_metrics(feas['true_labels'], predict_labels) [a,b,c] = cm.evaluationClusterModelFromLabel() record.append([a,b,c]) rec = np.array(record) index = rec[:,0].tolist().index(max(rec[:,0].tolist())) ana = record[index] print('------------------------------------',index) emb = record_emb[index] scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb, 'labels':feas['true_labels']}) print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))
def erun(self): tf.reset_default_graph() model_str = self.model # formatted data feas = format_data(self.data_name) placeholders = get_placeholder(feas['adjs'], feas['numView']) # construct model ae_model = get_model(model_str, placeholders, feas['numView'], feas['num_features'], feas['num_nodes'], self.n_clusters) # Optimizer opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders, feas['num_nodes']) # Initialize session config = tf.ConfigProto() config.gpu_options.allow_growth = True #设置tf模式为按需赠长模式 sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # Train model pos_weights = feas['pos_weights'] fea_pos_weights = feas['fea_pos_weights'] for epoch in range(self.warm_iteration): reconstruct_loss = warm_update(ae_model, opt, sess, feas['numView'], feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) == 50: emb = warm_update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=feas['pos_weights'], fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) avg_emb = (emb[0] + emb[1]) / 2 kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[0]) print("Epoch:", '%04d' % (epoch + 1)) predict_labels0 = kmeans.predict(emb[0]) label_num = count_num(predict_labels0) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels0) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) #enc = preprocessing.OneHotEncoder() #onehot_predict0 = enc.fit_transform(predict_labels0.reshape(-1, 1)) #Q = eng.modul(adjs0, onehot_predict) #print('view0 Q', Q) kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[1]) print("Epoch:", '%04d' % (epoch + 1)) predict_labels1 = kmeans.predict(emb[1]) label_num = count_num(predict_labels1) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels1) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) #onehot_predict1 = enc.fit_transform(predict_labels1.reshape(-1, 1)) #Q = eng.modul(adjs1, onehot_predict) #print('view1 Q', Q) #scio.savemat('acm_modurity.mat', {'adj0':feas['adjs_label'][0],'onehot_predict0':onehot_predict0, 'adj1':feas['adjs_label'][1],'onehot_predict1':onehot_predict1}) NMIs.append(nmi) print('NMIs', NMIs) print('warm up done!') for epoch in range(self.iterations): reconstruct_loss = update(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) print('reconstruct_loss', reconstruct_loss) if (epoch + 1) % 10 == 0: emb_ind = update_test(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights=pos_weights, fea_pos_weights=fea_pos_weights, norm=feas['norms'], attn_drop=0, ffd_drop=0) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb_ind[FLAGS.input_view]) print("PAP Epoch:", '%04d' % (epoch + 1)) predict_labels = kmeans.predict(emb_ind[FLAGS.input_view]) #print('emb1', emb_ind[1]) label_num = count_num(predict_labels) print('view1 label_num:', label_num) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(reconstruct_loss) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb_ind[FLAGS.input_view]) y_pred_last = kmeans.labels_ cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last) acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel( ) init_cluster = tf.constant(kmeans.cluster_centers_) sess.run( tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster)) q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) for epoch in range(self.kl_iterations): emb, kl_loss = update_kl(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], p, placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0., idx=idx, label=label_mask(feas['true_labels'])) if epoch % 10 == 0: kmeans = KMeans(n_clusters=self.n_clusters).fit( emb[FLAGS.input_view]) predict_labels = kmeans.predict(emb[FLAGS.input_view]) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) NMIs.append(nmi) loss.append(kl_loss) if epoch % 5 == 0: q = compute_q(ae_model, opt, sess, feas['adjs'], feas['adjs_label'], feas['features'], placeholders, pos_weights, fea_pos_weights, feas['norms'], attn_drop=0., ffd_drop=0.) p = target_distribution(q) y_pred = q.argmax(1) delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = y_pred print('delta_label', delta_label) print("Epoch:", '%04d' % (epoch + 1)) kmeans = KMeans(n_clusters=self.n_clusters).fit( emb[FLAGS.input_view]) predict_labels = kmeans.predict(emb[FLAGS.input_view]) cm = clustering_metrics(label_mask(feas['true_labels']), predict_labels) acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel( ) if epoch > 0 and delta_label < self.tol: print("early_stop") break print('NMI', NMIs) print('loss', loss) save_embed(emb[FLAGS.input_view], 'emb_10.txt') return acc, f1_macro, precision_macro, nmi, adjscore
mat = loadmat(matfile) A = mat['network'] graph = A.A #1490*1490 # 1. Load Embeddings and labels model = KeyedVectors.load_word2vec_format(embeddings_file, binary=False) vocab = [int(i) for i in list(model.wv.vocab.keys())] vocab.sort() emb = numpy.asarray([model[str(node)] for node in vocab]) #1224*64 empty = [i for i in range(graph.shape[0]) if i not in vocab] # 266 labels_all = mat['group'].nonzero()[1] #(1490,) labels = labels_all[vocab] #(1224,) kmeans = KMeans(2, random_state=0).fit(emb) predict_labels = kmeans.predict(emb) cm = clustering_metrics(labels, predict_labels) cm.evaluationClusterModelFromLabel() predict_labels_all = numpy.ones(graph.shape[0]) * (-1) print(predict_labels_all) print(predict_labels_all.shape) print(predict_labels.shape) predict_labels_all[vocab] = predict_labels predict_labels_all = predict_labels_all.astype(int) print(predict_labels_all) with open( '/home/zmm/advGraph/nettack-master/ourDefense/clusterLabel/dw_labels_polblogs', 'wb') as f: pkl.dump(predict_labels_all, f) # python example_graphs/scoring.py --emb example_graphs/polblogs.embeddings --network example_graphs/polblogs.mat --num-shuffle 10 --all
def train(features, adj_train, adj_train_norm, train_edges, train_false_edges, clustering_labels , K): print("training") # intialize the adam optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=LR) max_acc = 0 max_f1 = 0 max_top_acc = 0 n_nodes = adj_train.shape[0] # convert the normalized adj and the features to tensors adj_train_norm_tensor = convert_sparse_matrix_to_sparse_tensor(adj_train_norm) feature_tensor = convert_sparse_matrix_to_sparse_tensor(features) # define the model model = MyModel(K, adj_train_norm_tensor) for i in range(epochs): with tf.GradientTape() as tape: # forward pass pred = model(feature_tensor) # get the predictions for edges that are not in the adj_train train_edges_p_pred = [pred[x[0]*adj_train.shape[0]+x[1]] for x in train_edges] train_edges_n_pred = [pred[x[0]*adj_train.shape[0] +x[1]] for x in train_false_edges] train_edges_p_l = [1]*len(train_edges_p_pred) train_edges_n_l = [0]*len(train_edges_n_pred) pred = train_edges_p_pred + train_edges_n_pred y_actual = train_edges_p_l+train_edges_n_l # if you whant to train on the entire original adj use the below line #y_actual = adj_train.toarray().flatten() # get the embeddings embeddings_np = model.get_encode().numpy() mu = model.get_mu() logvar = model.get_logvar() # get loss loss = total_loss(y_actual, pred, mu, logvar, n_nodes) # get gradient from loss grad = tape.gradient(loss, model.trainable_variables) # optimize the weights optimizer.apply_gradients(zip(grad, model.trainable_variables)) print("#"*30) print("epoch:{}, train loss: {}".format(i, loss)) # get adj accuracy top_acc_function = tf.keras.metrics.BinaryAccuracy() top_acc_function.update_state(y_actual, pred) top_train_accuracy = top_acc_function.result().numpy() if(max_top_acc < top_train_accuracy): max_top_acc = top_train_accuracy print("train top acc: {}".format(top_train_accuracy)) # get labels accuracy pred_labels_x = embeddings_np.argmax(1) cm = clustering_metrics(labels, pred_labels_x) res = cm.clusteringAcc() print("acc:{}, f1:{}".format(res[0], res[2])) if(res[0] > max_acc): max_acc = res[0] if(res[2] > max_f1): max_f1 = res[2] print("max_acc:{}, max_f1:{}, max_top_acc: {}".format(max_acc, max_f1, max_top_acc))