def checkGraph(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, needtSNE=False, rawFeature=[]): prec, rec, f1 = 0.0, 0.0, 0.0 nb_nodes = fea_list[0].shape[0] ft_size = fea_list[0].shape[1] nb_classes = y_train.shape[1] # adj = adj.todense() # features = features[np.newaxis] # [1, nb_node, ft_size] fea_list = [fea[np.newaxis] for fea in fea_list] adj_list = [adj[np.newaxis] for adj in adj_list] biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list] print('build graph...') with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size), name='ftr_in_{}'.format(i)) for i in range(len(fea_list))] bias_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes), name='bias_in_{}'.format(i)) for i in range(len(biases_list))] lbl_in = tf.placeholder(dtype=tf.int32, shape=( batch_size, nb_nodes, nb_classes), name='lbl_in') msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes), name='msk_in') attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop') ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop') is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train') # forward logits, final_embedding, att_val = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat_list=bias_in_list, hid_units=hid_units, n_heads=n_heads, mp_att_size=200, residual=residual, activation=nonlinearity) return logits, final_embedding, att_val
def train(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]): prec, rec, f1 = 0.0, 0.0, 0.0 nb_nodes = fea_list[0].shape[0] ft_size = fea_list[0].shape[1] nb_classes = y_train.shape[1] # nb_classes = len(set(rawlabels)) # adj = adj.todense() # features = features[np.newaxis] # [1, nb_node, ft_size] fea_list = [fea[np.newaxis] for fea in fea_list] adj_list = [adj[np.newaxis] for adj in adj_list] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] y_all = y_all[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] all_mask = all_mask[np.newaxis] biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list] print('build graph...') with tf.Graph().as_default(): with tf.name_scope('input'): metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in') ftr_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size), name='ftr_in_{}'.format(i)) for i in range(len(fea_list))] bias_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes), name='bias_in_{}'.format(i)) for i in range(len(biases_list))] lbl_in = tf.placeholder(dtype=tf.int32, shape=( batch_size, nb_nodes, nb_classes), name='lbl_in') msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes), name='msk_in') attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop') ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop') is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train') # forward logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat_list=bias_in_list, hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels, residual=residual, activation=nonlinearity, feature_size=ft_size) # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32) # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32) # cal masked_loss # lab_list = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ), name='lab_list') # ftr_resh = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='ftr_resh') log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) print ("final_embedding: checkout", final_embedding) print ("logits: checkout", logits) print ("log_resh: checkout", log_resh) # print ("ftr_resh: ", ftr_resh) print ("lab_resh: ", lab_resh) print ("fea_list: ", fea_list) print ("centers_embed: ", centers_embed) print ("batch_size, nb_nodes, nb_classes, ft_size", batch_size, nb_nodes, nb_classes, ft_size) osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes) osm_loss = osm_caa_loss.forward # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32) # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32) # log_resh: checkout Tensor("Reshape:0", shape=(286, 30), dtype=float32) # ftr_resh: Tensor("ftr_resh:0", shape=(286, 100), dtype=float32) # lab_resh: Tensor("Reshape_1:0", shape=(286, 30), dtype=int32) osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed) # osmLoss, checkvalue = osm_loss(metric_ftr_in, rawlabels, centers_embed) SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) loss = osmLoss # 为什么loss会固定 # loss = osmLoss # loss = SoftMaxloss accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) # optimzie train_op = model.training(loss, lr, l2_coef) Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name) self.mkdir(Path) checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype) print('model: {}'.format(checkpt_file)) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 with tf.Session(config=config) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = fea_list[0].shape[0] # ================ training ============ while tr_step * batch_size < tr_size: fd1 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size] for i, d in zip(ftr_in_list, fea_list)} fd2 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size] for i, d in zip(bias_in_list, biases_list)} fd3 = {lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], metric_ftr_in: rawFeature, is_train: True, attn_drop: 0.6, ffd_drop: 0.6} fd = fd1 fd.update(fd2) fd.update(fd3) _, loss_value_tr, acc_tr, att_val_train = sess.run([train_op, loss, accuracy, att_val], feed_dict=fd) test_check_value = sess.run(checkvalue, feed_dict=fd) print ("test_check_value: ", test_check_value) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = fea_list[0].shape[0] # ============= val ================= while vl_step * batch_size < vl_size: # fd1 = {ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size]} fd1 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size] for i, d in zip(ftr_in_list, fea_list)} fd2 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size] for i, d in zip(bias_in_list, biases_list)} fd3 = {lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], metric_ftr_in: rawFeature, is_train: False, attn_drop: 0.0, ffd_drop: 0.0} fd = fd1 fd.update(fd2) fd.update(fd3) loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict=fd) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 # import pdb; pdb.set_trace() print('Epoch: {}, att_val: {}'.format(epoch, np.mean(att_val_train, axis=0))) print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f | vl_step: %d, tr_step: %d' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step, vl_step, tr_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 # check save saver.save(sess, checkpt_file) saver.restore(sess, checkpt_file) print('load model from : {}'.format(checkpt_file)) ts_size = fea_list[0].shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size] for i, d in zip(ftr_in_list, fea_list)} fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size] for i, d in zip(bias_in_list, biases_list)} fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size], metric_ftr_in: rawFeature, is_train: False, attn_drop: 0.0, ffd_drop: 0.0} fd = fd1 fd.update(fd2) fd.update(fd3) loss_value_ts, acc_ts, jhy_final_embedding, test_final_embeed_check = sess.run([loss, accuracy, final_embedding, test_final_embeed], feed_dict=fd) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 xx = np.expand_dims(jhy_final_embedding, axis=0)[all_mask] xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask] yy = y_all[all_mask] print ("check fd") print('xx: {}, yy: {}, ts_size: {}, ts_step: {}, batch_size: {}'.format(xx.shape, yy.shape, ts_size, ts_step,batch_size)) labels, numberofLabels = self.getLabel(yy) from utils import clustering, pairwise_precision_recall_f1 clusters_pred = clustering(xx2, num_clusters=numberofLabels) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels) print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels) if needtSNE: tSNEAnanlyse(xx, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_final.png" % (self.name))) tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_features.png" % (self.name))) tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_xx2.png" % (self.name))) tSNEAnanlyse(xx, clusters_pred, join(settings.PIC_DIR, "HAN", "rawReature_%s_result_label.png" % (self.name))) sess.close() return prec, rec, f1, xx2
def MetricDebug(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]): prec, rec, f1 = 0.0, 0.0, 0.0 nb_nodes = fea_list[0].shape[0] ft_size = fea_list[0].shape[1] nb_classes = y_train.shape[1] # nb_classes = len(set(rawlabels)) # adj = adj.todense() # features = features[np.newaxis] # [1, nb_node, ft_size] fea_list = [fea[np.newaxis] for fea in fea_list] adj_list = [adj[np.newaxis] for adj in adj_list] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] y_all = y_all[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] all_mask = all_mask[np.newaxis] biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list] print('build graph...') with tf.Graph().as_default(): with tf.name_scope('input'): metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in') ftr_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size), name='ftr_in_{}'.format(i)) for i in range(len(fea_list))] bias_in_list = [tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes), name='bias_in_{}'.format(i)) for i in range(len(biases_list))] lbl_in = tf.placeholder(dtype=tf.int32, shape=( batch_size, nb_nodes, nb_classes), name='lbl_in') msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes), name='msk_in') attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop') ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop') is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train') # forward logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat_list=bias_in_list, hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels, residual=residual, activation=nonlinearity, feature_size=ft_size) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes) osm_loss = osm_caa_loss.forward osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed) SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) loss = osmLoss accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) # optimzie train_op = model.training(loss, lr, l2_coef) Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name) self.mkdir(Path) checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) ts_size = fea_list[0].shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 with tf.Session(config=config) as sess: sess.run(init_op) saver.restore(sess, checkpt_file) while ts_step * batch_size < ts_size: fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size] for i, d in zip(ftr_in_list, fea_list)} fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size] for i, d in zip(bias_in_list, biases_list)} fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size], metric_ftr_in: rawFeature, is_train: False, attn_drop: 0.0, ffd_drop: 0.0} fd = fd1 fd.update(fd2) fd.update(fd3) test_final_embeed_check = sess.run([ test_final_embeed], feed_dict=fd) ts_step += 1 xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask] yy = y_all[all_mask] labels, numberofLabels = self.getLabel(yy) from utils import clustering, pairwise_precision_recall_f1 clusters_pred = clustering(xx2, num_clusters=numberofLabels) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels) print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels) if needtSNE: tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_features.png" % (self.name))) tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_xx2.png" % (self.name)))
nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] adj = adj.todense() features = features[np.newaxis] adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads,
tr_msk, vl_msk, ts_msk = process_ppi.process_p2p() # only difference are the name and that there are variables fpr train, validation and training for feature, adjacent and # mask now y_train = train_labels y_val = val_labels y_test = test_labels train_mask = tr_msk val_mask = vl_msk test_mask = ts_msk nb_nodes = train_feat.shape[1] ft_size = train_feat.shape[2] nb_classes = y_train.shape[2] train_adj = process.adj_to_bias(train_adj, [nb_nodes] * 20, nhood=1) val_adj = process.adj_to_bias(val_adj, [nb_nodes] * 2, nhood=1) test_adj = process.adj_to_bias(test_adj, [nb_nodes] * 2, nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size), name='features') bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes), name='bias') lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes), name='label') msk_in = tf.placeholder(dtype=tf.int32,
train_adj, val_adj, test_adj, train_feat, val_feat, test_feat, train_labels, val_labels, test_labels, train_nodes, val_nodes, test_nodes, train_mask, val_mask, test_mask = process_inductive.load_ppi( dataset) for i in range(train_feat.shape[0]): train_feat[i] = process.preprocess_features2(train_feat[i]) for i in range(val_feat.shape[0]): val_feat[i] = process.preprocess_features2(val_feat[i]) for i in range(test_feat.shape[0]): test_feat[i] = process.preprocess_features2(test_feat[i]) nb_nodes = train_feat.shape[1] ft_size = train_feat.shape[2] nb_classes = train_labels.shape[2] train_biases = process.adj_to_bias(train_adj, [nb_nodes] * train_adj.shape[0], nhood=1) val_biases = process.adj_to_bias(val_adj, [nb_nodes] * val_adj.shape[0], nhood=1) test_biases = process.adj_to_bias(test_adj, [nb_nodes] * test_adj.shape[0], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=())
# adj = adj.todense() features = features[np.newaxis] # [1, nb_node, ft_size] adj_list = [adj[np.newaxis] for adj in adj_list] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] biases_list = [ process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list ] with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in_list = [ tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) for _ in range(len(biases_list)) ] lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=())
def train(sparse, epochs, lr, patience, l2_coef, hid_units, n_heads, residual, attention_drop, edge_attr_directory, node_features_path, label_path, log_path, train_ratio): # flags = tf.app.flags # FLAGS = flags.FLAGS nb_epochs = epochs # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') if tf.gfile.Exists(log_path): tf.gfile.DeleteRecursively(log_path) tf.gfile.MakeDirs(log_path) checkpt_file = 'pre_trained/mod_test.ckpt' dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 in_drop = attention_drop print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 vmf1_mx = 0.0 curr_step = 0 with tf.Session() as sess: merged = tf.summary.merge_all() train_summary_writer = tf.summary.FileWriter(log_path + '/train') test_summary_writer = tf.summary.FileWriter(log_path + '/test') sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, summary, loss_value_tr, acc_tr, micro_f1_tr = sess.run( [train_op, merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: attention_drop, ffd_drop: in_drop }) print(loss_value_tr) train_microf1_avg += micro_f1_tr train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 train_summary_writer.add_summary(summary, epoch) vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] summary, loss_value_vl, acc_vl, micro_f1_vl = sess.run( [merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_microf1_avg += micro_f1_vl val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 test_summary_writer.add_summary(summary, epoch) print( 'Training: loss = %.5f, acc = %.5f, micro_f1 = %.5f | Val: loss = %.5f, acc = %.5f, micro_f1 = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, train_microf1_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step, val_microf1_avg / vl_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vmf1_mx = np.max((val_microf1_avg / vl_step, vmf1_mx)) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx, ', Max Micro-f1', vmf1_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu config = tf.ConfigProto() config.gpu_options.allow_growth = True dataset = args.dataset # training params batch_size = 1 nb_epochs = 100000 patience = 100 lr = args.lr l2_coef = args.l2 hid_units = [args.units] n_heads = [1, 1] # layers drop_out = args.drop residual = False nonlinearity = tf.nn.elu model = SpHGAT print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) pred_all = tf.cast(tf.argmax(log_resh, 1), dtype=tf.int32) real_all = tf.cast(tf.argmax(lab_resh, 1), dtype=tf.int32) train_op = model.my_training(loss, lr, l2_coef) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 tlss_mn = 0.0 tacc_mx = 0.0 curr_step = 0 with tf.Session(config=config) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: drop_out, ffd_drop: drop_out}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 print(epoch, 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn: vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 tlss_mn = ts_loss / ts_step tacc_mx = ts_acc / ts_step else: curr_step += 1 if curr_step == patience: print('Early stop! Min validation loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Test loss:', tlss_mn, ', accuracy {}'.format(tacc_mx)) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 sess.close()
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units, n_heads, residual, nonlinearity, model, checkpt_file, nhood): # redirect output to file import sys orig_stdout = sys.stdout if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'): f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a') print('\n\n\n\n') else: f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w') sys.stdout = f print('Dataset: ' + dataset) print('batch_size: ' + str(batch_size)) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) print('nhood: ' + str(nhood)) adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] adj = adj.todense() features = features[np.newaxis] adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=nhood) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 start = time.time() with tf.Session() as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: _, loss_value_tr, acc_tr = sess.run( [train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], bias_in: biases[tr_step * batch_size:(tr_step + 1) * batch_size], lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: 0.6, ffd_drop: 0.6 }) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: loss_value_vl, acc_vl = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], bias_in: biases[vl_step * batch_size:(vl_step + 1) * batch_size], lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 print( 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: biases[ts_step * batch_size:(ts_step + 1) * batch_size], lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time', time.time() - start) sess.close() sys.stdout = orig_stdout f.close()
train_nodes, val_nodes, test_nodes,\ tr_msk, vl_msk, ts_msk\ = process_ppi.process_p2p(is_toy_model) # for i in range(train_feat.shape[0]): # train_feat[i] = process.preprocess_features2(train_feat[i]) # for i in range(val_feat.shape[0]): # val_feat[i] = process.preprocess_features2(val_feat[i]) # for i in range(test_feat.shape[0]): # test_feat[i] = process.preprocess_features2(test_feat[i]) nb_nodes = train_feat.shape[1] ft_size = train_feat.shape[2] nb_classes = train_labels.shape[2] tr_biases = process.adj_to_bias(train_adj, train_nodes, nhood=1) vl_biases = process.adj_to_bias(val_adj, val_nodes, nhood=1) ts_biases = process.adj_to_bias(test_adj, test_nodes, nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop,
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units, n_heads, residual, nonlinearity, model, checkpt_file, nhood, param_attn_drop=0.6, param_ffd_drop=0.6, sparse=False): ''' Function that runs all the experiments. :param dataset: The string name of the dataset. :param batch_size: Number of samples per batch. Has to be one for spartial execution. :param nb_epochs: Number of epochs that the method runs :param patience: The number of epochs with no improvement in validation accuracy that stops the training. :param lr: Learning rate. :param l2_coef: The L2 regularization strength. :param hid_units: List. Number of features the respecting layer produces from the input features. :param n_heads: List. Number of entries is the number of layers. The elements value is the number of attention heads. :param residual: Whether or not to use residual connections in the hidden layers. :param nonlinearity: tensorflow function for non-linearity :param model: Model that inherits from BasGAttn and implements the inference method :param checkpt_file: Location where the logs, output and model checkpoints are saved :param nhood: The neighborhood to consider. One for direct neighborhood and two for neighbors of neighbors. :param param_attn_drop: Drops a percent of attention coefficients. :param param_ffd_drop: Drops a percent of inputs from the previous layer. :param sparse: If True, the model has to be SpGAT :return: Prints and logs results. ''' # necessary work around to run on GPU ''' from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) ''' # redirect output to file import sys orig_stdout = sys.stdout if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'): f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a') sys.stdout = f print('\n\n\n\n') else: f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w') sys.stdout = f print('Dataset: ' + dataset) print('batch_size: ' + str(batch_size)) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) print('nhood: ' + str(nhood)) print('attn_drop: ' + str(param_attn_drop)) print('ffd_drop: ' + str(param_ffd_drop)) # load any of the three transductive datasets # adj has information about the connections # features is a node node x features matrix with the features for each node # y_... has the label for each class in a node x class matrix # mask has 0 or 1 as value in a node vector, this is used to mask train, val and test set # so for all nodes all information is calculated, but only nodes masked with 1 are evaluated adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset) # preprocessing steps features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] # the adjacency matrix is transformed into a bias that is added. # when no connection between nodes exist in the specified neighborhood, the value of the attention between # both nodes is set to a big negative value, pratically canceling out the effect. if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): # initialization ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) # bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 start = time.time() with tf.Session() as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] # training steps while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, loss_value_tr, acc_tr = sess.run( [train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: param_attn_drop, ffd_drop: param_ffd_drop }) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] # validation steps while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] loss_value_vl, acc_vl = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 print( 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step)) # patience step if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 # evaluate on the training set while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time', time.time() - start) # log information about the training if os.path.isfile(os.path.dirname(checkpt_file) + 'log.csv'): print('loading existing log') df = pd.read_csv(os.path.dirname(checkpt_file) + 'log.csv', index_col=['run']) print('log: ' + str(df)) else: print('Creating new log') df = pd.DataFrame(columns=tracking_params + result_cols) log = dict( zip(tracking_params + result_cols, [ dataset, lr, l2_coef, hid_units, n_heads, residual, str(nonlinearity).split(' ')[1], param_attn_drop, param_ffd_drop, nhood ] + [ epoch, time.time() - start, vlss_mn, vacc_mx, ts_loss / ts_step, ts_acc / ts_step ])) print('Adding entry: ' + str(log)) df = df.append(log, ignore_index=True) print('saving logs') df.to_csv(os.path.dirname(checkpt_file) + 'log.csv', index_label='run') print('log save succesfull') sess.close() # restore standard output sys.stdout = orig_stdout f.close()
def train(sparse, hid_units, n_heads, residual, edge_attr_directory, node_features_path, label_path, train_ratio, model_path): # flags = tf.app.flags # FLAGS = flags.FLAGS # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') # if tf.gfile.Exists(log_path): # tf.gfile.DeleteRecursively(log_path) # tf.gfile.MakeDirs(log_path) checkpt_file = model_path dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 # in_drop = attention_drop print('Dataset: ' + dataset) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) # train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
def train(): sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset, train_size, validation_size) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: #bias_idx = tf.placeholder(tf.int64) #bias_val = tf.placeholder(tf.float32) #bias_shape = tf.placeholder(tf.int64) bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: 0.6, ffd_drop: 0.6}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 if args.validate: vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 else: tr_step = 0 vl_step = 0 vl_size = features.shape[0] val_loss_avg = 0 val_acc_avg = 0 while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: False, attn_drop: 0., ffd_drop: 0.}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 tr_step += 1 print('%d Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (epoch, train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step > vacc_mx or val_loss_avg/vl_step < vlss_mn: if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn: vacc_early_model = val_acc_avg/vl_step vlss_early_model = val_loss_avg/vl_step saver.save(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step*batch_size:(ts_step+1)*batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size], bias_in: bbias, lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size], msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step) sess.close() return ts_loss/ts_step, ts_acc/ts_step