def main(args): ''' Pipeline for Graph Attention Autoencoder. ''' G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset) # add feature dimension size to the beginning of hidden_dims feature_dim = X.shape[1] args.hidden_dims = [feature_dim] + args.hidden_dims # prepare the data G_tf, S, R = process.prepare_graph_data(G) # Train the Model trainer = Trainer(args) trainer(G_tf, X, S, R) embeddings, attentions = trainer.infer(G_tf, X, S, R) # Evaluate the quality of embeddings classifier = Classifier(vectors=embeddings) f1s = classifier(idx_train, idx_test, idx_val, Y, seed=0) print f1s
def main(): saved_graph = os.path.join('assets', 'saved_graphs', 'best_dgi.pickle') saved_logreg = os.path.join('assets', 'saved_graphs', 'best_logreg.pickle') dataset = 'cora' # training params batch_size = 1 nb_epochs = 10000 patience = 25 lr = 0.001 l2_coef = 0.0 drop_prob = 0.0 hid_units = 512 sparse = True nonlinearity = 'prelu' # special name to separate parameters adj, features, labels, idx_train, idx_test, idx_val = process.load_data(dataset) features, _ = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = labels.shape[1] adj = process.normalize_adj(adj + sp.eye(adj.shape[0])) if sparse: adj = process.sparse_mx_to_torch_sparse_tensor(adj) else: adj = (adj + sp.eye(adj.shape[0])).todense() features = torch.FloatTensor(features[np.newaxis]) if not sparse: adj = torch.FloatTensor(adj[np.newaxis]) labels = torch.FloatTensor(labels[np.newaxis]) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) print("Training Nodes: {}, Testing Nodes: {}, Validation Nodes: {}".format(len(idx_train), len(idx_test), len(idx_val))) model = DGI(ft_size, hid_units, nonlinearity) optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef) if torch.cuda.is_available(): print('Using CUDA') model.cuda() features = features.cuda() if sparse: sp_adj = sp_adj.cuda() else: adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() b_xent = nn.BCEWithLogitsLoss() xent = nn.CrossEntropyLoss() cant_wait = 0 best = 1e9 best_t = 0 if not os.path.exists(saved_graph): pbar = trange(nb_epochs) for epoch in pbar: model.train() optimiser.zero_grad() idx = np.random.permutation(nb_nodes) shuf_fts = features[:, idx, :] lbl_1 = torch.ones(batch_size, nb_nodes) lbl_2 = torch.zeros(batch_size, nb_nodes) lbl = torch.cat((lbl_1, lbl_2), 1) if torch.cuda.is_available(): shuf_fts = shuf_fts.cuda() lbl = lbl.cuda() logits = model(features, shuf_fts, adj, sparse, None, None, None) loss = b_xent(logits, lbl) pbar.desc = 'Loss: {:.4f}'.format(loss) if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(model.state_dict(), saved_graph) else: cant_wait += 1 if cant_wait == patience: tqdm.write('Early stopping!') break loss.backward() optimiser.step() print('Loading {}th Epoch'.format(best_t) if best_t else 'Loading Existing Graph') model.load_state_dict(torch.load(saved_graph)) embeds, _ = model.embed(features, adj, sparse, None) train_embs = embeds[0, idx_train] val_embs = embeds[0, idx_val] test_embs = embeds[0, idx_test] train_lbls = torch.argmax(labels[0, idx_train], dim=1) val_lbls = torch.argmax(labels[0, idx_val], dim=1) test_lbls = torch.argmax(labels[0, idx_test], dim=1) tot = torch.zeros(1) if torch.cuda.is_available(): tot = tot.cuda() accs = [] print("\nValidation:") pbar = trange(50) for _ in pbar: log = LogReg(hid_units, nb_classes) opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0) pat_steps = 0 best_acc = torch.zeros(1) if torch.cuda.is_available(): log.cuda() best_acc = best_acc.cuda() for _ in range(100): log.train() opt.zero_grad() logits = log(train_embs) loss = xent(logits, train_lbls) loss.backward() opt.step() logits = log(test_embs) preds = torch.argmax(logits, dim=1) acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0] accs.append(acc * 100) pbar.desc = "Accuracy: {:.2f}%".format(100 * acc) tot += acc torch.save(log.state_dict(), saved_logreg) accs = torch.stack(accs) print('Average Accuracy: {:.2f}%'.format(accs.mean())) print('Standard Deviation: {:.3f}'.format(accs.std())) print("\nTesting") logits = log(val_embs) preds = torch.argmax(logits, dim=1) acc = torch.sum(preds == val_lbls).float() / val_lbls.shape[0] print("Accuracy: {:.2f}%".format(100 * acc))
from utils import process import numpy as np _, _, labels, _, _, _ = process.load_data('cora') stat = np.sum(labels, axis=0) print('cora: ', stat) _, _, labels, _, _, _ = process.load_data('pubmed') stat = np.sum(labels, axis=0) print('pubmed: ', stat) _, _, labels, _, _, _ = process.load_data('citeseer') stat = np.sum(labels, axis=0) print('citeseer: ', stat)
print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) validation = True # Load data if validation: adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data_2( dataset, seed, unlabel_prob) else: adj, features, y_train, y_test, train_mask, test_mask = process.load_data( dataset, seed, unlabel_prob) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse:
def main(args): """ Pipeline for Graph Attention Auto-encoder. G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset) print('Graph的维度:' + str(G.shape)) print('Content的维度:' + str(X.shape)) Label = np.array([np.argmax(l) for l in Y]) print('Label的维度:' + str(Label.shape)) # add feature dimension size to the beginning of hidden_dims feature_dim = X.shape[1] args.hidden_dims = [feature_dim] + args.hidden_dims print('隐层单元的维度:' + str(args.hidden_dims)) # prepare the data """ # data_z = sio.loadmat('database/HW/nhandwritten_2views.mat') # data_dict = dict(data_z) # X1 = data_dict['x1'] # X2 = data_dict['x2'] # Label = data_dict['gt'].T # Label = np.squeeze(np.array(Label)) # data_G = sio.loadmat('database/HW/hw5.mat') # g = dict(data_G)['hw5'] # G = sp.coo_matrix(dict(data_G)['hw5']) # G_tf, S, R = process.prepare_graph_data(G) # # feature_dim1 = X1.shape[1] # args.hidden_dims1 = [feature_dim1] + args.hidden_dims1 # feature_dim2 = X2.shape[1] # args.hidden_dims2 = [feature_dim2] + args.hidden_dims2 # # print('Graph的维度:' + str(G.shape)) # print('Content1的维度:' + str(X1.shape)) # print('Content2的维度:' + str(X2.shape)) # print('Label的维度:' + str(Label.shape)) # print('隐层单元1的维度:' + str(args.hidden_dims1)) # print('隐层单元2的维度:' + str(args.hidden_dims2)) # # # PreTrain the Model # # fin = False # trainer = Trainer(args) # _ = trainer.assign(G_tf, X1, S, R, G_tf, X2, S, R) # # trainer(G_tf, X, S, R, Label, fin) # # Fintune the Model # fin = True # trainer(G_tf, X1, S, R, G_tf, X2, S, R, Label, fin) """ Pipeline for Graph Attention Auto-encoder. """ G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset) print('Graph的维度:' + str(G.shape)) print('Content的维度:' + str(X.shape)) Label = np.array([np.argmax(l) for l in Y]) print('Label的维度:' + str(Label.shape)) # add feature dimension size to the beginning of hidden_dims feature_dim1 = X.shape[1] args.hidden_dims1 = [feature_dim1] + args.hidden_dims1 X2 = fft(X) feature_dim2 = X2.shape[1] args.hidden_dims2 = [feature_dim2] + args.hidden_dims2 print('隐层单元1的维度:' + str(args.hidden_dims1)) print('隐层单元2的维度:' + str(args.hidden_dims2)) # prepare the data G_tf, S, R = process.prepare_graph_data(G) # PreTrain the Model # fin = False trainer = Trainer(args) _ = trainer.assign(G_tf, X, S, R, G_tf, X2, S, R) # trainer(G_tf, X, S, R, Label, fin) # Fintune the Model fin = True trainer(G_tf, X, S, R, G_tf, X2, S, R, Label, fin)
from keras.optimizers import Adam from keras.regularizers import l2 from keras_gat import GraphAttention # from keras_gat.utils import load_data, preprocess_features from utils.process import load_data, preprocess_features tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # Read data dataset = sys.argv[1] if dataset not in ['cora', 'citeseer', 'pubmed', 'wiki']: print('invalid dataset') exit() A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data(dataset) # Parameters N = X.shape[0] # Number of nodes in the graph F = X.shape[1] # Original feature dimension n_classes = Y_train.shape[1] # Number of classes F_ = 100 # Output size of first GraphAttention layer n_attn_heads = 8 # Number of attention heads in first GAT layer dropout_rate = 0.5 # Dropout rate (between and inside GAT layers) l2_reg = 5e-4/2 # Factor for l2 regularization learning_rate = 0.0005 # Learning rate for Adam epochs = 10000 # Number of training epochs es_patience = 100 # Patience fot early stopping # Preprocessing operations X = preprocess_features(X)
parser.add_argument('--beta', type=float, default=1.0, help='parameter for I(h_i; x_j), node j is a neighbor (default: 1.0)') parser.add_argument('--gamma', type=float, default=1.0, help='parameter for I(w_ij; a_ij) (default: 1.0)') parser.add_argument('--activation', default='prelu', help='activation function') ############################################### # This section of code adapted from Petar Veličković/DGI # ############################################### args = parser.parse_args() torch.cuda.set_device(args.gpu) print('Loading ', args.dataset) adj_ori, features, labels, idx_train, idx_val, idx_test = process.load_data(args.dataset) features, _ = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = labels.shape[1] adj = process.normalize_adj(adj_ori + sp.eye(adj_ori.shape[0])) sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj) features = torch.FloatTensor(features[np.newaxis]) labels = torch.FloatTensor(labels[np.newaxis]) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) model = GMI(ft_size, args.hid_units, args.activation)
def train(): sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset, train_size, validation_size) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: #bias_idx = tf.placeholder(tf.int64) #bias_val = tf.placeholder(tf.float32) #bias_shape = tf.placeholder(tf.int64) bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: 0.6, ffd_drop: 0.6}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 if args.validate: vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 else: tr_step = 0 vl_step = 0 vl_size = features.shape[0] val_loss_avg = 0 val_acc_avg = 0 while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: False, attn_drop: 0., ffd_drop: 0.}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 tr_step += 1 print('%d Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (epoch, train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step > vacc_mx or val_loss_avg/vl_step < vlss_mn: if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn: vacc_early_model = val_acc_avg/vl_step vlss_early_model = val_loss_avg/vl_step saver.save(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step*batch_size:(ts_step+1)*batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size], bias_in: bbias, lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size], msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step) sess.close() return ts_loss/ts_step, ts_acc/ts_step
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu config = tf.ConfigProto() config.gpu_options.allow_growth = True dataset = args.dataset # training params batch_size = 1 nb_epochs = 100000 patience = 100 lr = args.lr l2_coef = args.l2 hid_units = [args.units] n_heads = [1, 1] # layers drop_out = args.drop residual = False nonlinearity = tf.nn.elu model = SpHGAT print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) pred_all = tf.cast(tf.argmax(log_resh, 1), dtype=tf.int32) real_all = tf.cast(tf.argmax(lab_resh, 1), dtype=tf.int32) train_op = model.my_training(loss, lr, l2_coef) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 tlss_mn = 0.0 tacc_mx = 0.0 curr_step = 0 with tf.Session(config=config) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: drop_out, ffd_drop: drop_out}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 print(epoch, 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn: vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 tlss_mn = ts_loss / ts_step tacc_mx = ts_acc / ts_step else: curr_step += 1 if curr_step == patience: print('Early stop! Min validation loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Test loss:', tlss_mn, ', accuracy {}'.format(tacc_mx)) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 sess.close()
def train(sparse, epochs, lr, patience, l2_coef, hid_units, n_heads, residual, attention_drop, edge_attr_directory, node_features_path, label_path, log_path, train_ratio): # flags = tf.app.flags # FLAGS = flags.FLAGS nb_epochs = epochs # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') if tf.gfile.Exists(log_path): tf.gfile.DeleteRecursively(log_path) tf.gfile.MakeDirs(log_path) checkpt_file = 'pre_trained/mod_test.ckpt' dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 in_drop = attention_drop print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 vmf1_mx = 0.0 curr_step = 0 with tf.Session() as sess: merged = tf.summary.merge_all() train_summary_writer = tf.summary.FileWriter(log_path + '/train') test_summary_writer = tf.summary.FileWriter(log_path + '/test') sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, summary, loss_value_tr, acc_tr, micro_f1_tr = sess.run( [train_op, merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: attention_drop, ffd_drop: in_drop }) print(loss_value_tr) train_microf1_avg += micro_f1_tr train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 train_summary_writer.add_summary(summary, epoch) vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] summary, loss_value_vl, acc_vl, micro_f1_vl = sess.run( [merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_microf1_avg += micro_f1_vl val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 test_summary_writer.add_summary(summary, epoch) print( 'Training: loss = %.5f, acc = %.5f, micro_f1 = %.5f | Val: loss = %.5f, acc = %.5f, micro_f1 = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, train_microf1_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step, val_microf1_avg / vl_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vmf1_mx = np.max((val_microf1_avg / vl_step, vmf1_mx)) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx, ', Max Micro-f1', vmf1_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units, n_heads, residual, nonlinearity, model, checkpt_file, nhood): # redirect output to file import sys orig_stdout = sys.stdout if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'): f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a') print('\n\n\n\n') else: f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w') sys.stdout = f print('Dataset: ' + dataset) print('batch_size: ' + str(batch_size)) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) print('nhood: ' + str(nhood)) adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] adj = adj.todense() features = features[np.newaxis] adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=nhood) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 start = time.time() with tf.Session() as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: _, loss_value_tr, acc_tr = sess.run( [train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], bias_in: biases[tr_step * batch_size:(tr_step + 1) * batch_size], lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: 0.6, ffd_drop: 0.6 }) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: loss_value_vl, acc_vl = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], bias_in: biases[vl_step * batch_size:(vl_step + 1) * batch_size], lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 print( 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: biases[ts_step * batch_size:(ts_step + 1) * batch_size], lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time', time.time() - start) sess.close() sys.stdout = orig_stdout f.close()
from utils import process dataset = 'cora' # training params batch_size = 1 nb_epochs = 10000 patience = 20 lr = 0.001 l2_coef = 0.0 drop_prob = 0.0 hid_units = 512 sparse = True nonlinearity = 'prelu' # special name to separate parameters adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset, use_dgi_data=False) features, _ = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = labels.shape[1] adj = process.normalize_adj(adj + sp.eye(adj.shape[0])) if sparse: sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj) else: adj = (adj + sp.eye(adj.shape[0])).todense() features = torch.FloatTensor(features[np.newaxis]) if not sparse:
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units, n_heads, residual, nonlinearity, model, checkpt_file, nhood, param_attn_drop=0.6, param_ffd_drop=0.6, sparse=False): ''' Function that runs all the experiments. :param dataset: The string name of the dataset. :param batch_size: Number of samples per batch. Has to be one for spartial execution. :param nb_epochs: Number of epochs that the method runs :param patience: The number of epochs with no improvement in validation accuracy that stops the training. :param lr: Learning rate. :param l2_coef: The L2 regularization strength. :param hid_units: List. Number of features the respecting layer produces from the input features. :param n_heads: List. Number of entries is the number of layers. The elements value is the number of attention heads. :param residual: Whether or not to use residual connections in the hidden layers. :param nonlinearity: tensorflow function for non-linearity :param model: Model that inherits from BasGAttn and implements the inference method :param checkpt_file: Location where the logs, output and model checkpoints are saved :param nhood: The neighborhood to consider. One for direct neighborhood and two for neighbors of neighbors. :param param_attn_drop: Drops a percent of attention coefficients. :param param_ffd_drop: Drops a percent of inputs from the previous layer. :param sparse: If True, the model has to be SpGAT :return: Prints and logs results. ''' # necessary work around to run on GPU ''' from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) ''' # redirect output to file import sys orig_stdout = sys.stdout if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'): f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a') sys.stdout = f print('\n\n\n\n') else: f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w') sys.stdout = f print('Dataset: ' + dataset) print('batch_size: ' + str(batch_size)) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) print('nhood: ' + str(nhood)) print('attn_drop: ' + str(param_attn_drop)) print('ffd_drop: ' + str(param_ffd_drop)) # load any of the three transductive datasets # adj has information about the connections # features is a node node x features matrix with the features for each node # y_... has the label for each class in a node x class matrix # mask has 0 or 1 as value in a node vector, this is used to mask train, val and test set # so for all nodes all information is calculated, but only nodes masked with 1 are evaluated adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset) # preprocessing steps features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] # the adjacency matrix is transformed into a bias that is added. # when no connection between nodes exist in the specified neighborhood, the value of the attention between # both nodes is set to a big negative value, pratically canceling out the effect. if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): # initialization ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) # bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 start = time.time() with tf.Session() as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] # training steps while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, loss_value_tr, acc_tr = sess.run( [train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: param_attn_drop, ffd_drop: param_ffd_drop }) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] # validation steps while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] loss_value_vl, acc_vl = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 print( 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step)) # patience step if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 # evaluate on the training set while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time', time.time() - start) # log information about the training if os.path.isfile(os.path.dirname(checkpt_file) + 'log.csv'): print('loading existing log') df = pd.read_csv(os.path.dirname(checkpt_file) + 'log.csv', index_col=['run']) print('log: ' + str(df)) else: print('Creating new log') df = pd.DataFrame(columns=tracking_params + result_cols) log = dict( zip(tracking_params + result_cols, [ dataset, lr, l2_coef, hid_units, n_heads, residual, str(nonlinearity).split(' ')[1], param_attn_drop, param_ffd_drop, nhood ] + [ epoch, time.time() - start, vlss_mn, vacc_mx, ts_loss / ts_step, ts_acc / ts_step ])) print('Adding entry: ' + str(log)) df = df.append(log, ignore_index=True) print('saving logs') df.to_csv(os.path.dirname(checkpt_file) + 'log.csv', index_label='run') print('log save succesfull') sess.close() # restore standard output sys.stdout = orig_stdout f.close()
def train(sparse, hid_units, n_heads, residual, edge_attr_directory, node_features_path, label_path, train_ratio, model_path): # flags = tf.app.flags # FLAGS = flags.FLAGS # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') # if tf.gfile.Exists(log_path): # tf.gfile.DeleteRecursively(log_path) # tf.gfile.MakeDirs(log_path) checkpt_file = model_path dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 # in_drop = attention_drop print('Dataset: ' + dataset) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) # train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
nonlinearity = tf.nn.elu model = GAT print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] adj = adj.todense() features = features[np.newaxis] adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis]
from utils import process from utils import metrics dataset = "citeseer" batch_size = 1 nb_epochs = 200 patience = 20 lr = 0.001 l2_coef = 0.0 drop_prob = 0.0 hid_units = 32 out_units = 16 sparse = False nonlinearity = "prelu" adj, features, labels, g = process.load_data(dataset) features, _ = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = len(labels) adj = process.normalize_adj(adj + sp.eye(adj.shape[0])) adj = (adj + sp.eye(adj.shape[0])).todense() adj = torch.FloatTensor(adj[np.newaxis]) features = torch.FloatTensor(features[np.newaxis]) # labels = torch.FloatTensor(labels[np.newaxis]) model = DGI(ft_size, hid_units, nonlinearity) # model = GAE(ft_size, hid_units, out_units, nonlinearity)
parser.add_argument('--b', dest='beta', type=int, default=100,help='') parser.add_argument('--c', dest='num_clusters', type=float, default=128,help='') parser.add_argument('--a', dest='alpha', type=float, default=0.5,help='') parser.add_argument('--test_rate', dest='test_rate', type=float, default=0.1,help='') args = parser.parse_args() return args if __name__ == "__main__": args = parser_loader() pruning_gcn.print_args(args) rewind_weight = None dataset = args.dataset adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset) adj_sparse = adj adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = process.mask_test_edges(adj, test_frac=args.test_rate, val_frac=0.05) adj = adj_train features, _ = process.preprocess_features(features) features = torch.FloatTensor(features[np.newaxis]).cuda() adj = torch.FloatTensor(adj.todense()).cuda() labels = torch.FloatTensor(labels[np.newaxis]).cuda() dataset_dict = {} dataset_dict['adj'] = adj dataset_dict['adj_sparse'] = adj_sparse dataset_dict['features'] = features dataset_dict['labels'] = labels dataset_dict['val_edges'] = val_edges
print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) sparse = True for per_class in [200]: result_path = 'result/' + dataset + str(per_class) + '/' if not os.path.exists('result'): os.mkdir('result') if not os.path.exists(result_path): os.mkdir(result_path) metapaths, metapaths_name, adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset, per_class) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] all_metapaths_best = (0, 0, 0, 0)