def __init__(self, params): self.num_factors = params.num_factors self.num_users = params.num_users self.num_items = params.num_items self.num_doms = params.num_doms self.reg_lam = params.reg_lam self.reg_w = params.reg_w self.reg_b = params.reg_b self.initializer = params.initializer self.params = params self.num_nodes = params.num_nodes self.feature_dim = params.feature_dim self.social_attr_dim = params.social_attr_dim print('feature dim, social_attr_dim: ', self.feature_dim, self.social_attr_dim) self.model = SpGAT() self.biases = process.preprocess_adj_bias(params.adjacency_matrix) self.user_item_features = params.user_item_embed_mat #[np.newaxis] self.social_features = params.social_embed_mat #[np.newaxis] self.out_size = params.num_factors self.hid_units = params.hid_units self.n_heads = params.n_heads self.attn_keep = params.attn_keep self.ffd_keep = params.ffd_keep self.proj_keep = params.proj_keep self.residual = False self.nonlinearity = tf.nn.elu
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu config = tf.ConfigProto() config.gpu_options.allow_growth = True dataset = args.dataset # training params batch_size = 1 nb_epochs = 100000 patience = 100 lr = args.lr l2_coef = args.l2 hid_units = [args.units] n_heads = [1, 1] # layers drop_out = args.drop residual = False nonlinearity = tf.nn.elu model = SpHGAT print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) pred_all = tf.cast(tf.argmax(log_resh, 1), dtype=tf.int32) real_all = tf.cast(tf.argmax(lab_resh, 1), dtype=tf.int32) train_op = model.my_training(loss, lr, l2_coef) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 tlss_mn = 0.0 tacc_mx = 0.0 curr_step = 0 with tf.Session(config=config) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: drop_out, ffd_drop: drop_out}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 print(epoch, 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn: vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 tlss_mn = ts_loss / ts_step tacc_mx = ts_acc / ts_step else: curr_step += 1 if curr_step == patience: print('Early stop! Min validation loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Test loss:', tlss_mn, ', accuracy {}'.format(tacc_mx)) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 sess.close()
nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test_real = y_test test_mask_real = test_mask y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: #bias_idx = tf.placeholder(tf.int64) #bias_val = tf.placeholder(tf.float32) #bias_shape = tf.placeholder(tf.int64) bias_in = tf.sparse_placeholder(dtype=tf.float32) else:
adj = sp.csr_matrix(adj) ### nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] biases = process.preprocess_adj_bias( adj) if args.model == 'gat' else process.preprocess_adj(adj) nnz = len(biases[1]) def run_once(run_id): with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) bias_in = tf.sparse_placeholder(dtype=tf.float32) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
def train(sparse, epochs, lr, patience, l2_coef, hid_units, n_heads, residual, attention_drop, edge_attr_directory, node_features_path, label_path, log_path, train_ratio): # flags = tf.app.flags # FLAGS = flags.FLAGS nb_epochs = epochs # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') if tf.gfile.Exists(log_path): tf.gfile.DeleteRecursively(log_path) tf.gfile.MakeDirs(log_path) checkpt_file = 'pre_trained/mod_test.ckpt' dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 in_drop = attention_drop print('Dataset: ' + dataset) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 vmf1_mx = 0.0 curr_step = 0 with tf.Session() as sess: merged = tf.summary.merge_all() train_summary_writer = tf.summary.FileWriter(log_path + '/train') test_summary_writer = tf.summary.FileWriter(log_path + '/test') sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, summary, loss_value_tr, acc_tr, micro_f1_tr = sess.run( [train_op, merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: attention_drop, ffd_drop: in_drop }) print(loss_value_tr) train_microf1_avg += micro_f1_tr train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 train_summary_writer.add_summary(summary, epoch) vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] summary, loss_value_vl, acc_vl, micro_f1_vl = sess.run( [merged, loss, accuracy, micro_f1], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_microf1_avg += micro_f1_vl val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 test_summary_writer.add_summary(summary, epoch) print( 'Training: loss = %.5f, acc = %.5f, micro_f1 = %.5f | Val: loss = %.5f, acc = %.5f, micro_f1 = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, train_microf1_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step, val_microf1_avg / vl_step)) if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vmf1_mx = np.max((val_microf1_avg / vl_step, vmf1_mx)) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx, ', Max Micro-f1', vmf1_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 train_microf1_avg = 0 val_loss_avg = 0 val_acc_avg = 0 val_microf1_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
#some preprocess adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( FLAGS.dataset) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] #biases = process.preprocess_adj_gat(adj)####process adj to find the 1hop and 2hop neighbors biases = process.preprocess_adj_bias(adj) ####just 1-hop adj matrix adj_hop1_all, adj_hop2_all, adj_hop1_neig, adj_hop2_neig, N_hop1_target, N_hop2_target = process.preprocess_bilinear( adj) t_rep_s = time.time() ftr_in = tf.placeholder(dtype=tf.float32, shape=(1, nb_nodes, ft_size)) bias_in = tf.sparse_placeholder(dtype=tf.float32) adj_hop1_all_in = tf.sparse_placeholder(dtype=tf.float32) adj_hop2_all_in = tf.sparse_placeholder(dtype=tf.float32) adj_hop1_neig_in = tf.sparse_placeholder(dtype=tf.float32) adj_hop2_neig_in = tf.sparse_placeholder(dtype=tf.float32) N_hop1_target_in = tf.sparse_placeholder(dtype=tf.float32) N_hop2_target_in = tf.sparse_placeholder(dtype=tf.float32) lbl_in = tf.placeholder(dtype=tf.int32, shape=(1, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(1, nb_nodes))
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units, n_heads, residual, nonlinearity, model, checkpt_file, nhood, param_attn_drop=0.6, param_ffd_drop=0.6, sparse=False): ''' Function that runs all the experiments. :param dataset: The string name of the dataset. :param batch_size: Number of samples per batch. Has to be one for spartial execution. :param nb_epochs: Number of epochs that the method runs :param patience: The number of epochs with no improvement in validation accuracy that stops the training. :param lr: Learning rate. :param l2_coef: The L2 regularization strength. :param hid_units: List. Number of features the respecting layer produces from the input features. :param n_heads: List. Number of entries is the number of layers. The elements value is the number of attention heads. :param residual: Whether or not to use residual connections in the hidden layers. :param nonlinearity: tensorflow function for non-linearity :param model: Model that inherits from BasGAttn and implements the inference method :param checkpt_file: Location where the logs, output and model checkpoints are saved :param nhood: The neighborhood to consider. One for direct neighborhood and two for neighbors of neighbors. :param param_attn_drop: Drops a percent of attention coefficients. :param param_ffd_drop: Drops a percent of inputs from the previous layer. :param sparse: If True, the model has to be SpGAT :return: Prints and logs results. ''' # necessary work around to run on GPU ''' from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) ''' # redirect output to file import sys orig_stdout = sys.stdout if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'): f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a') sys.stdout = f print('\n\n\n\n') else: f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w') sys.stdout = f print('Dataset: ' + dataset) print('batch_size: ' + str(batch_size)) print('----- Opt. hyperparams -----') print('lr: ' + str(lr)) print('l2_coef: ' + str(l2_coef)) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) print('nhood: ' + str(nhood)) print('attn_drop: ' + str(param_attn_drop)) print('ffd_drop: ' + str(param_ffd_drop)) # load any of the three transductive datasets # adj has information about the connections # features is a node node x features matrix with the features for each node # y_... has the label for each class in a node x class matrix # mask has 0 or 1 as value in a node vector, this is used to mask train, val and test set # so for all nodes all information is calculated, but only nodes masked with 1 are evaluated adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data( dataset) # preprocessing steps features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] # the adjacency matrix is transformed into a bias that is added. # when no connection between nodes exist in the specified neighborhood, the value of the attention between # both nodes is set to a big negative value, pratically canceling out the effect. if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): # initialization ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) # bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 start = time.time() with tf.Session() as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] # training steps while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step * batch_size:(tr_step + 1) * batch_size] _, loss_value_tr, acc_tr = sess.run( [train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step * batch_size:(tr_step + 1) * batch_size], bias_in: bbias, lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size], msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size], is_train: True, attn_drop: param_attn_drop, ffd_drop: param_ffd_drop }) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 vl_step = 0 vl_size = features.shape[0] # validation steps while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step * batch_size:(vl_step + 1) * batch_size] loss_value_vl, acc_vl = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size], bias_in: bbias, lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size], msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 print( 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (train_loss_avg / tr_step, train_acc_avg / tr_step, val_loss_avg / vl_step, val_acc_avg / vl_step)) # patience step if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn: if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn: vacc_early_model = val_acc_avg / vl_step vlss_early_model = val_loss_avg / vl_step saver.save(sess, checkpt_file) vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 # evaluate on the training set while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time', time.time() - start) # log information about the training if os.path.isfile(os.path.dirname(checkpt_file) + 'log.csv'): print('loading existing log') df = pd.read_csv(os.path.dirname(checkpt_file) + 'log.csv', index_col=['run']) print('log: ' + str(df)) else: print('Creating new log') df = pd.DataFrame(columns=tracking_params + result_cols) log = dict( zip(tracking_params + result_cols, [ dataset, lr, l2_coef, hid_units, n_heads, residual, str(nonlinearity).split(' ')[1], param_attn_drop, param_ffd_drop, nhood ] + [ epoch, time.time() - start, vlss_mn, vacc_mx, ts_loss / ts_step, ts_acc / ts_step ])) print('Adding entry: ' + str(log)) df = df.append(log, ignore_index=True) print('saving logs') df.to_csv(os.path.dirname(checkpt_file) + 'log.csv', index_label='run') print('log save succesfull') sess.close() # restore standard output sys.stdout = orig_stdout f.close()
def train(sparse, hid_units, n_heads, residual, edge_attr_directory, node_features_path, label_path, train_ratio, model_path): # flags = tf.app.flags # FLAGS = flags.FLAGS # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory') # if tf.gfile.Exists(log_path): # tf.gfile.DeleteRecursively(log_path) # tf.gfile.MakeDirs(log_path) checkpt_file = model_path dataset = 'know' # training params batch_size = 1 nonlinearity = tf.nn.elu if sparse: model = SpGAT else: model = GAT nhood = 1 # in_drop = attention_drop print('Dataset: ' + dataset) print('----- Archi. hyperparams -----') print('nb. layers: ' + str(len(hid_units))) print('nb. units per layer: ' + str(hid_units)) print('nb. attention heads: ' + str(n_heads)) print('residual: ' + str(residual)) print('nonlinearity: ' + str(nonlinearity)) print('model: ' + str(model)) adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data( edge_attr_directory, node_features_path, label_path, train_ratio) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] # adj = adj[np.newaxis] # adjs = [adj[np.newaxis] for adj in adjs] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias( adjs[0], to_unweighted=True ) # sparse (indices, values, dense_shape), the graph topologies (unweighted) adjs = [ tf.SparseTensor( *process.preprocess_adj_bias(adj, to_unweighted=False)) for adj in adjs ] else: biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood) # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood) print(biases) # with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: # bias_in = tf.sparse_placeholder(dtype=tf.float32) bias_in = tf.SparseTensor(*biases) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(inputs=ftr_in, edge_adjs=adjs, nb_classes=nb_classes, nb_nodes=nb_nodes, training=is_train, attn_drop=attn_drop, ffd_drop=ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity, edge_attr_name=edge_attr_name) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) # train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run( [loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], # bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0 }) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) sess.close()
def train(): sparse = True adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset, train_size, validation_size) features, spars = process.preprocess_features(features) nb_nodes = features.shape[0] ft_size = features.shape[1] nb_classes = y_train.shape[1] features = features[np.newaxis] y_train = y_train[np.newaxis] y_val = y_val[np.newaxis] y_test = y_test[np.newaxis] train_mask = train_mask[np.newaxis] val_mask = val_mask[np.newaxis] test_mask = test_mask[np.newaxis] if sparse: biases = process.preprocess_adj_bias(adj) else: adj = adj.todense() adj = adj[np.newaxis] biases = process.adj_to_bias(adj, [nb_nodes], nhood=1) with tf.Graph().as_default(): with tf.name_scope('input'): ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size)) if sparse: #bias_idx = tf.placeholder(tf.int64) #bias_val = tf.placeholder(tf.float32) #bias_shape = tf.placeholder(tf.int64) bias_in = tf.sparse_placeholder(dtype=tf.float32) else: bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes)) lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes)) msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes)) attn_drop = tf.placeholder(dtype=tf.float32, shape=()) ffd_drop = tf.placeholder(dtype=tf.float32, shape=()) is_train = tf.placeholder(dtype=tf.bool, shape=()) logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train, attn_drop, ffd_drop, bias_mat=bias_in, hid_units=hid_units, n_heads=n_heads, residual=residual, activation=nonlinearity) log_resh = tf.reshape(logits, [-1, nb_classes]) lab_resh = tf.reshape(lbl_in, [-1, nb_classes]) msk_resh = tf.reshape(msk_in, [-1]) loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh) accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh) train_op = model.training(loss, lr, l2_coef) saver = tf.train.Saver() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) vlss_mn = np.inf vacc_mx = 0.0 curr_step = 0 gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init_op) train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 for epoch in range(nb_epochs): tr_step = 0 tr_size = features.shape[0] while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: True, attn_drop: 0.6, ffd_drop: 0.6}) train_loss_avg += loss_value_tr train_acc_avg += acc_tr tr_step += 1 if args.validate: vl_step = 0 vl_size = features.shape[0] while vl_step * batch_size < vl_size: if sparse: bbias = biases else: bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size], bias_in: bbias, lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size], msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 else: tr_step = 0 vl_step = 0 vl_size = features.shape[0] val_loss_avg = 0 val_acc_avg = 0 while tr_step * batch_size < tr_size: if sparse: bbias = biases else: bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size] loss_value_vl, acc_vl = sess.run([loss, accuracy], feed_dict={ ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size], bias_in: bbias, lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size], msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size], is_train: False, attn_drop: 0., ffd_drop: 0.}) val_loss_avg += loss_value_vl val_acc_avg += acc_vl vl_step += 1 tr_step += 1 print('%d Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' % (epoch, train_loss_avg/tr_step, train_acc_avg/tr_step, val_loss_avg/vl_step, val_acc_avg/vl_step)) if val_acc_avg/vl_step > vacc_mx or val_loss_avg/vl_step < vlss_mn: if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn: vacc_early_model = val_acc_avg/vl_step vlss_early_model = val_loss_avg/vl_step saver.save(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size], bias_in: bbias, lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size], msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step) vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx)) vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn)) curr_step = 0 else: curr_step += 1 if curr_step == patience: print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx) print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model) break train_loss_avg = 0 train_acc_avg = 0 val_loss_avg = 0 val_acc_avg = 0 saver.restore(sess, checkpt_file) ts_size = features.shape[0] ts_step = 0 ts_loss = 0.0 ts_acc = 0.0 while ts_step * batch_size < ts_size: if sparse: bbias = biases else: bbias = biases[ts_step*batch_size:(ts_step+1)*batch_size] loss_value_ts, acc_ts = sess.run([loss, accuracy], feed_dict={ ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size], bias_in: bbias, lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size], msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size], is_train: False, attn_drop: 0.0, ffd_drop: 0.0}) ts_loss += loss_value_ts ts_acc += acc_ts ts_step += 1 print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step) sess.close() return ts_loss/ts_step, ts_acc/ts_step
def __init__(self,params): self.num_factors = params.num_factors self.num_users = params.num_users self.num_items = params.num_items self.num_doms = params.num_doms self.attr_dim = params.attn_head_size self.reg_lam = params.reg_lam self.reg_w = params.reg_w self.reg_b = params.reg_b self.initializer = params.initializer self.num_views = params.num_views self.num_attn_heads = params.num_attn_heads self.num_memory_heads = params.num_memory_heads self.attn_head_size = params.attn_head_size self.memory_head_size = params.memory_head_size self.proj_keep = params.proj_keep self.attention_keep = params.attention_keep self.params = params # list initializtion ==================================== self.memnet = [None] * self.num_views self.mult_views = [None] * self.num_views self.item_view_embeds = [None] * self.num_views self.item_attr_mat = [None] * self.num_views self.user_embeds_view = [None] * self.num_views self.proj_item_view = [None] * self.num_views self.mhead_item_view_output = [None] * self.num_views self.m3_item_view_output = [None] * self.num_views # gat ==== self.model = [None] * self.num_views self.biases = [None] * self.num_views self.num_nodes = [None] * self.num_views self.X_features_view = [None] * self.num_views self.bias_in = [None] * self.num_views self.logits = [None] * self.num_views self.item_view_embeddings = [None] * self.num_views self.item_view_embeds = [None] * self.num_views self.X_features_item_entity_view = [None] * self.num_views # ===================================================== self.adjacency_view_matrix = params.adjacency_view_matrix for view in range(self.num_views): self.model[view] = SpGAT() self.biases[view] = process.preprocess_adj_bias(params.adjacency_view_matrix[view]) self.num_nodes[view] = params.adjacency_view_matrix[view].shape[0] self.out_size = params.num_factors self.hid_units = params.hid_units self.n_heads = params.n_heads self.attn_keep = params.attn_keep self.ffd_keep = params.ffd_keep self.proj_keep = params.proj_keep self.residual = False self.nonlinearity = tf.nn.elu self.dense = tf.keras.layers.Dense(self.attn_head_size,use_bias=True,activation='elu') self.dense_attn = tf.keras.layers.Dense(self.attn_head_size/2,use_bias=True,activation='elu') self.dense_w = tf.keras.layers.Dense(1,use_bias=False,activation='sigmoid')