Example #1
0
def evaluate(sess,
             model,
             placeholders,
             minibatch_iter,
             candidates,
             q_1_dict,
             N_steps,
             N_negs,
             valid_data,
             support,
             feats,
             size=None):
    t_test = time.time()
    feed_dict_val, node1, node2 = minibatch_iter.val_feed_dict(
        valid_data, size)
    start_given = None
    neg_examples = negative_sampling(model, sess, candidates, start_given,
                                     q_1_dict, N_steps, N_negs, node1, node2,
                                     args, support, feats, placeholders)
    feed_dict_val.update({placeholders['batch3']: neg_examples})
    feed_dict_val.update({placeholders['batch4']: size})
    feed_dict_val.update(
        {placeholders['support'][i]: support[i]
         for i in range(len(support))})
    feed_dict_val.update(
        {placeholders['feats']: (feats[0], feats[1], feats[2])})
    outs_val = sess.run([model.loss, model.ranks, model.mrr],
                        feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], outs_val[2], (time.time() - t_test)
Example #2
0
def train(G, train_data, valid_data, test_data, args):
    # read data
    nodes_all = args.user_num + args.item_num
    id_map = construct_id_map(nodes_all)
    feats = preprocess_features(nodes_all)
    num_supports = 1
    placeholders = {
        'batch1': tf.placeholder(tf.int32, shape=(None), name='batch1'),
        'batch2': tf.placeholder(tf.int32, shape=(None), name='batch2'),
        'batch3': tf.placeholder(tf.int32, shape=(None), name='batch3'),
        'batch4': tf.placeholder(tf.int32, shape=(None), name='batch4'),
        'feats': tf.sparse_placeholder(tf.float32),
        'support':
        [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
        'dropout': tf.placeholder_with_default(0., shape=(), name='dropout'),
        'batch_size': tf.placeholder(tf.int32, name='batch_size'),
    }
    context_pairs = load_walks(train_data)
    minibatch = EdgeMinibatchIterator(G,
                                      id_map,
                                      placeholders,
                                      batch_size=args.batch_size,
                                      max_degree=args.max_degree,
                                      context_pairs=context_pairs)

    adjs = load_adj(train_data, nodes_all)
    support = [preprocess_adj(adjs)]
    adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    print("build model....")
    if args.input == './data/ml-100k/':
        model = GCN(placeholders,
                    input_dim=feats[2][1],
                    embedding_dim=args.dim,
                    lr=args.learning_rate,
                    args=args,
                    logging=True)
    else:
        raise Exception('Error: data cannot evaluated')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess = tf.Session(config=config)
    # tensorboard
    summary_writer = tf.summary.FileWriter("logs/", sess.graph)
    sess.run(init_op, feed_dict={adj_info_ph: minibatch.adj})
    train_adj_info = tf.assign(adj_info, minibatch.adj)

    # training
    t1 = time.time()
    q_1_dict, mask = load_item_pop(train_data)

    # DFS for each node to generate markov chain
    print("generating markov chain by DFS......")
    tt = time.time()
    candidates = candidate_choose(G, mask, args)
    print("time for generating negative examples", time.time() - tt)

    N_steps = 10
    N_negs = 1
    best_mrr = 0

    for epoch in range(args.epochs):
        print("epoch %d" % epoch)
        data_loader = Data_Loader(args.batch_size)
        data_loader.load_train_data(train_data)
        data_loader.reset_pointer()
        for it in range(data_loader.num_batch):
            batch_data = data_loader.next_batch()
            node1 = [x[0] for x in batch_data]
            node2 = [x[1] for x in batch_data]

            # generate negative examples with MCNS
            t0 = time.time()
            if it == 0:
                start_given = None
            else:
                start_given = generate_examples
            generate_examples = negative_sampling(model, sess, candidates,
                                                  start_given, q_1_dict,
                                                  N_steps, N_negs, node1,
                                                  node2, args, support, feats,
                                                  placeholders)

            # update model params

            feed_dict = {
                model.inputs1: node1,
                model.inputs2: node2,
                model.neg_samples: generate_examples,
                model.batch_size: args.batch_size,
                model.number: args.batch_size,
                model.inputs: (feats[0], feats[1], feats[2])
            }
            feed_dict.update({
                placeholders['support'][i]: support[i]
                for i in range(len(support))
            })
            outs = sess.run([
                model.merged_loss, model.merged_mrr, model.loss, model.mrr,
                model.opt_op, model.outputs1, model.outputs2, model.neg_outputs
            ],
                            feed_dict=feed_dict)

            # add_summary for tensorboard show
            if it % args.print_step == 0:
                summary_writer.add_summary(outs[0],
                                           epoch * data_loader.num_batch + it)
                summary_writer.add_summary(outs[1],
                                           epoch * data_loader.num_batch + it)
            if it % args.validate_iter == 0:
                t2 = time.time()
                val_cost, ranks, val_mrr, duration = evaluate(
                    sess,
                    model,
                    placeholders,
                    minibatch,
                    candidates,
                    q_1_dict,
                    N_steps,
                    N_negs,
                    valid_data,
                    support,
                    feats,
                    size=args.validate_batch_size)
                print("evaluate time", time.time() - t2)
            if it % args.print_step == 0:
                print("model model", "Iter:", '%04d' % it, "d_loss=",
                      "{:.5f}".format(outs[2]), "d_mrr=",
                      "{:.5f}".format(outs[3]))
                print("validation model", "Iter:", '%04d' % it, "val_loss=",
                      "{:.5f}".format(val_cost), "val_mrr=",
                      "{:.5f}".format(val_mrr))

        # validation for early stopping......
        val_cost, ranks, val_mrr, duration = evaluate(
            sess,
            model,
            placeholders,
            minibatch,
            candidates,
            q_1_dict,
            N_steps,
            N_negs,
            valid_data,
            support,
            feats,
            size=args.validate_batch_size)

        curr_mrr = val_mrr
        if curr_mrr > best_mrr:
            best_mrr = curr_mrr
            patience = 0
        else:
            patience += 1
            if patience > args.patience:
                print("Early Stopping...")
                break

    # save model embeddings for downstream task
    save_embeddings(sess, model, minibatch, args.validate_batch_size, support,
                    feats, placeholders, args.save_dir)
    print("training complete......")

    # test for recommendation......
    # mrr, hit30 = recommend(test_data, args)
    mrr, hit30 = recommend(train_data, valid_data, test_data, args)
    print("test_mrr=", "{:.5f}".format(mrr), "test_hit30=",
          "{:.5f}".format(hit30))