def evaluate(sess, model, placeholders, minibatch_iter, candidates, q_1_dict, N_steps, N_negs, valid_data, support, feats, size=None): t_test = time.time() feed_dict_val, node1, node2 = minibatch_iter.val_feed_dict( valid_data, size) start_given = None neg_examples = negative_sampling(model, sess, candidates, start_given, q_1_dict, N_steps, N_negs, node1, node2, args, support, feats, placeholders) feed_dict_val.update({placeholders['batch3']: neg_examples}) feed_dict_val.update({placeholders['batch4']: size}) feed_dict_val.update( {placeholders['support'][i]: support[i] for i in range(len(support))}) feed_dict_val.update( {placeholders['feats']: (feats[0], feats[1], feats[2])}) outs_val = sess.run([model.loss, model.ranks, model.mrr], feed_dict=feed_dict_val) return outs_val[0], outs_val[1], outs_val[2], (time.time() - t_test)
def train(G, train_data, valid_data, test_data, args): # read data nodes_all = args.user_num + args.item_num id_map = construct_id_map(nodes_all) feats = preprocess_features(nodes_all) num_supports = 1 placeholders = { 'batch1': tf.placeholder(tf.int32, shape=(None), name='batch1'), 'batch2': tf.placeholder(tf.int32, shape=(None), name='batch2'), 'batch3': tf.placeholder(tf.int32, shape=(None), name='batch3'), 'batch4': tf.placeholder(tf.int32, shape=(None), name='batch4'), 'feats': tf.sparse_placeholder(tf.float32), 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'dropout': tf.placeholder_with_default(0., shape=(), name='dropout'), 'batch_size': tf.placeholder(tf.int32, name='batch_size'), } context_pairs = load_walks(train_data) minibatch = EdgeMinibatchIterator(G, id_map, placeholders, batch_size=args.batch_size, max_degree=args.max_degree, context_pairs=context_pairs) adjs = load_adj(train_data, nodes_all) support = [preprocess_adj(adjs)] adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape) adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info") print("build model....") if args.input == './data/ml-100k/': model = GCN(placeholders, input_dim=feats[2][1], embedding_dim=args.dim, lr=args.learning_rate, args=args, logging=True) else: raise Exception('Error: data cannot evaluated') config = tf.ConfigProto() config.gpu_options.allow_growth = True init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session(config=config) # tensorboard summary_writer = tf.summary.FileWriter("logs/", sess.graph) sess.run(init_op, feed_dict={adj_info_ph: minibatch.adj}) train_adj_info = tf.assign(adj_info, minibatch.adj) # training t1 = time.time() q_1_dict, mask = load_item_pop(train_data) # DFS for each node to generate markov chain print("generating markov chain by DFS......") tt = time.time() candidates = candidate_choose(G, mask, args) print("time for generating negative examples", time.time() - tt) N_steps = 10 N_negs = 1 best_mrr = 0 for epoch in range(args.epochs): print("epoch %d" % epoch) data_loader = Data_Loader(args.batch_size) data_loader.load_train_data(train_data) data_loader.reset_pointer() for it in range(data_loader.num_batch): batch_data = data_loader.next_batch() node1 = [x[0] for x in batch_data] node2 = [x[1] for x in batch_data] # generate negative examples with MCNS t0 = time.time() if it == 0: start_given = None else: start_given = generate_examples generate_examples = negative_sampling(model, sess, candidates, start_given, q_1_dict, N_steps, N_negs, node1, node2, args, support, feats, placeholders) # update model params feed_dict = { model.inputs1: node1, model.inputs2: node2, model.neg_samples: generate_examples, model.batch_size: args.batch_size, model.number: args.batch_size, model.inputs: (feats[0], feats[1], feats[2]) } feed_dict.update({ placeholders['support'][i]: support[i] for i in range(len(support)) }) outs = sess.run([ model.merged_loss, model.merged_mrr, model.loss, model.mrr, model.opt_op, model.outputs1, model.outputs2, model.neg_outputs ], feed_dict=feed_dict) # add_summary for tensorboard show if it % args.print_step == 0: summary_writer.add_summary(outs[0], epoch * data_loader.num_batch + it) summary_writer.add_summary(outs[1], epoch * data_loader.num_batch + it) if it % args.validate_iter == 0: t2 = time.time() val_cost, ranks, val_mrr, duration = evaluate( sess, model, placeholders, minibatch, candidates, q_1_dict, N_steps, N_negs, valid_data, support, feats, size=args.validate_batch_size) print("evaluate time", time.time() - t2) if it % args.print_step == 0: print("model model", "Iter:", '%04d' % it, "d_loss=", "{:.5f}".format(outs[2]), "d_mrr=", "{:.5f}".format(outs[3])) print("validation model", "Iter:", '%04d' % it, "val_loss=", "{:.5f}".format(val_cost), "val_mrr=", "{:.5f}".format(val_mrr)) # validation for early stopping...... val_cost, ranks, val_mrr, duration = evaluate( sess, model, placeholders, minibatch, candidates, q_1_dict, N_steps, N_negs, valid_data, support, feats, size=args.validate_batch_size) curr_mrr = val_mrr if curr_mrr > best_mrr: best_mrr = curr_mrr patience = 0 else: patience += 1 if patience > args.patience: print("Early Stopping...") break # save model embeddings for downstream task save_embeddings(sess, model, minibatch, args.validate_batch_size, support, feats, placeholders, args.save_dir) print("training complete......") # test for recommendation...... # mrr, hit30 = recommend(test_data, args) mrr, hit30 = recommend(train_data, valid_data, test_data, args) print("test_mrr=", "{:.5f}".format(mrr), "test_hit30=", "{:.5f}".format(hit30))