Esempio n. 1
0
def test_model(model, sess, test_set, model_args):
    batch_size = model_args["batch_size"]
    total_steps = int(test_set.shape[0] / batch_size)

    mrr_list = {5: [], 20: []}
    hr_list = {5: [], 20: []}
    ndcg_list = {5: [], 20: []}

    time_buffer = []
    for batch_step in range(total_steps):
        test_batch = test_set[batch_step * batch_size:(batch_step + 1) *
                              batch_size, :]

        tic = time.time()
        pred_probs = sess.run(model.probs_test,
                              feed_dict={model.input_test: test_batch})
        toc = time.time()
        time_buffer.append(toc - tic)

        ground_truth = test_batch[:, -1]

        top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20])

        indices_5 = [
            np.argwhere(line == item)
            for line, item in zip(top_5_rank, ground_truth)
        ]
        indices_20 = [
            np.argwhere(line == item)
            for line, item in zip(top_20_rank, ground_truth)
        ]

        mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5)
        mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20)

        mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub)
        hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub)
        ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub)

        mrr_list[5].extend(mrr5_sub)
        hr_list[5].extend(hr5_sub)
        ndcg_list[5].extend(ndcg5_sub)

    logging.info("[Test] Time: {:.3f}s +- {:.3f}s per batch".format(
        np.mean(time_buffer), np.std(time_buffer)))

    ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20])
    mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20])
    hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20])

    logging.info("\t MRR@5: {:.4f},  HIT@5: {:.4f},  NDCG@5: {:.4f}".format(
        mrr_5, hr_5, ndcg_5))
    logging.info("\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format(
        mrr_20, hr_20, ndcg_20))

    return mrr_5
Esempio n. 2
0
def start(global_args):
    preset(global_args)

    model_args, train_set, test_set = get_data_and_config(global_args)

    ratio = global_args["occupy"]
    if ratio is None:
        gpu_config = get_proto_config()
        logging.info("Auto-growth GPU memory.")
    else:
        gpu_config = get_proto_config_with_occupy(ratio)
        logging.info("{:.1f}% GPU memory occupied.".format(ratio * 100))

    sess = tf.Session(config=gpu_config)

    with tf.variable_scope("policy_net"):
        policy_net = PolicyNetGumbelGru(model_args)
        policy_net.build_policy()

    with tf.variable_scope(tf.get_variable_scope()):
        model = NextItNetGumbel(model_args)
        model.build_train_graph(policy_action=policy_net.action_predict)
        model.build_test_graph(policy_action=policy_net.action_predict)

    variables = tf.contrib.framework.get_variables_to_restore()
    model_variables = [
        v for v in variables if not v.name.startswith("policy_net")
    ]
    policy_variables = [
        v for v in variables if v.name.startswith("policy_net")
    ]

    with tf.variable_scope(tf.get_variable_scope()):
        optimizer_finetune = tf.train.AdamOptimizer(
            learning_rate=model_args["lr"], name="Adam_finetune")
        train_model = optimizer_finetune.minimize(model.loss,
                                                  var_list=model_variables)
    with tf.variable_scope("policy_net"):
        optimizer_policy = tf.train.AdamOptimizer(
            learning_rate=model_args["lr"], name="Adam_policy")
        train_policy = optimizer_policy.minimize(model.loss,
                                                 var_list=policy_variables)

    init = tf.global_variables_initializer()
    sess.run(init)

    # restore if needed
    if global_args["use_pre"]:
        restore_op = tf.train.Saver(var_list=model_variables)
        restore_op.restore(sess, global_args["pre"])
        sess.run(tf.assign(policy_net.item_embedding, model.item_embedding))
        logging.info(">>>>> Parameters loaded from pre-trained model.")
    else:
        logging.info(">>>>> Training without pre-trained model.")

    logging.info("Start @ {}".format(strftime("%m.%d-%H:%M:%S", localtime())))

    saver = tf.train.Saver(max_to_keep=3)

    batch_size = model_args["batch_size"]
    log_meter = model_args["log_every"]
    total_iters = model_args["iterations"]
    total_steps = int(train_set.shape[0] / batch_size)
    test_steps = int(test_set.shape[0] / batch_size)

    model_save_path = global_args["store_path"]
    model_name = global_args["name"]

    logging.info("Batch size = {}, Batches = {}".format(
        batch_size, total_steps))

    best_mrr_at5 = 0.0

    for idx in range(total_iters):
        logging.info("-" * 30)
        logging.info("Iter: {} / {}".format(idx + 1, total_iters))
        num_iter = 1
        tic = time.time()

        train_usage_sample = []
        for batch_step in range(total_steps):
            train_batch = train_set[batch_step * batch_size:(batch_step + 1) *
                                    batch_size, :]
            _, _, loss, action = sess.run(
                [
                    train_model, train_policy, model.loss,
                    policy_net.action_predict
                ],
                feed_dict={
                    model.input_train: train_batch,
                    policy_net.input: train_batch,
                },
            )
            train_usage_sample.extend(np.array(action).tolist())

            if num_iter % log_meter == 0:
                logging.info("\t{:5d} /{:5d} Loss: {:.3f}".format(
                    batch_step + 1, total_steps, loss))
            num_iter += 1

        summary_block(train_usage_sample, len(model_args["dilations"]),
                      "Train")

        # 1. eval model
        mrr_list = {5: [], 20: []}
        hr_list = {5: [], 20: []}
        ndcg_list = {5: [], 20: []}

        test_usage_sample = []
        for batch_step in range(test_steps):
            test_batch = test_set[batch_step * batch_size:(batch_step + 1) *
                                  batch_size, :]

            action, pred_probs = sess.run(
                [policy_net.action_predict, model.probs],
                feed_dict={
                    model.input_test: test_batch,
                    policy_net.input: test_batch,
                },
            )

            test_usage_sample.extend(np.array(action).tolist())

            ground_truth = test_batch[:, -1]
            top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20])
            indices_5 = [
                np.argwhere(line == item)
                for line, item in zip(top_5_rank, ground_truth)
            ]
            indices_20 = [
                np.argwhere(line == item)
                for line, item in zip(top_20_rank, ground_truth)
            ]

            mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5)
            mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20)

            mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub)
            hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub)
            ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub)

        summary_block(test_usage_sample, len(model_args["dilations"]), "Test")

        ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20])
        mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20])
        hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20])

        logging.info("<Metric>::TestSet")
        logging.info(
            "\t MRR@5: {:.4f},  HIT@5: {:.4f},  NDCG@5: {:.4f}".format(
                mrr_5, hr_5, ndcg_5))
        logging.info(
            "\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format(
                mrr_20, hr_20, ndcg_20))

        mrr_at5 = mrr_5

        # 2. save model
        if mrr_at5 > best_mrr_at5:
            logging.info(
                ">>>>> Saving model due to better MRR@5: {:.4f} <<<<< ".format(
                    mrr_at5))
            saver.save(
                sess,
                os.path.join(model_save_path,
                             "{}_{}.tfkpt".format(model_name, num_iter)),
            )
            best_mrr_at5 = mrr_at5

        toc = time.time()
        logging.info("Iter: {} / {} finish. Time: {:.2f} min".format(
            idx + 1, total_iters, (toc - tic) / 60))

    sess.close()
Esempio n. 3
0
def start(global_args):
    preset(global_args)

    model_args, train_set, test_set = get_data_and_config(global_args)

    # ----------------------
    # Part.1 Build Model(s)
    # ----------------------
    ratio = global_args["occupy"]
    if ratio is None:
        gpu_config = get_proto_config()
        logging.info("Auto-growth GPU memory.")
    else:
        gpu_config = get_proto_config_with_occupy(ratio)
        logging.info("{:.1f}% GPU memory occupied.".format(ratio * 100))

    sess = tf.Session(config=gpu_config)

    with tf.variable_scope("policy_net"):
        policy_net = PolicyNetGumbelRL(model_args)
        policy_net.build_policy()

    with tf.variable_scope(tf.get_variable_scope()):
        model = NextItNetGumbelRL(model_args)
        model.build_train_graph(policy_action=policy_net.action_predict)
        model.build_test_graph(policy_action=policy_net.action_predict)

    # step-1, prepare parameters' name
    variables = tf.contrib.framework.get_variables_to_restore()
    model_variables = [
        v for v in variables if not v.name.startswith("policy_net")
    ]
    policy_variables = [
        v for v in variables if v.name.startswith("policy_net")
    ]

    # step-2, create optimizer
    with tf.variable_scope(tf.get_variable_scope()):
        optimizer_finetune = tf.train.AdamOptimizer(
            learning_rate=model_args["lr"], name="Adam_finetune")
        train_model = optimizer_finetune.minimize(model.loss,
                                                  var_list=model_variables)
        # train_model_rl = optimizer_finetune.minimize(
        #     policy_net.rl_loss, var_list=model_variables
        # )
    with tf.variable_scope("policy_net"):
        optimizer_policy = tf.train.AdamOptimizer(
            learning_rate=model_args["lr"], name="Adam_policy")
        train_policy = optimizer_policy.minimize(model.loss,
                                                 var_list=policy_variables)
        train_policy_rl = optimizer_policy.minimize(policy_net.rl_loss,
                                                    var_list=policy_variables)

    # step-4, restore parameters if needed
    if not global_args["resume"]:
        init = tf.global_variables_initializer()
        sess.run(init)
        start_at = 0
        if global_args["use_pre"]:
            # step-4.1 restore pre-trained parameters
            restore_op = tf.train.Saver(var_list=model_variables)
            restore_op.restore(sess, global_args["pre"])
            # step-4.2 copy embedding to policy-net
            sess.run(tf.assign(policy_net.item_embedding,
                               model.item_embedding))
            logging.info(">>>>> Parameters loaded from pre-trained model.")
        else:
            logging.info(">>>>> Training without pre-trained model.")
    else:
        resume_op = tf.train.Saver()
        resume_op.restore(sess, global_args["resume_path"])
        start_at = global_args["resume_at"]
        logging.info(
            ">>>>> Resume from checkpoint, start at epoch {}".format(start_at))

    # ----------------------
    # Part.2 Train
    # ----------------------
    logging.info("Start @ {}".format(strftime("%m.%d-%H:%M:%S", localtime())))

    saver = tf.train.Saver(max_to_keep=3)

    batch_size = model_args["batch_size"]
    log_meter = model_args["log_every"]
    total_iters = model_args["iter"]
    total_steps = int(train_set.shape[0] / batch_size)
    test_steps = int(test_set.shape[0] / batch_size)

    model_save_path = global_args["store_path"]
    model_name = global_args["name"]

    logging.info("Batch size = {}, Batches = {}".format(
        batch_size, total_steps))

    best_mrr_at5 = 0.0

    action_nums = len(model_args["dilations"])

    for idx in range(start_at, total_iters):
        logging.info("-" * 30)
        if idx < global_args["rl_iter"]:
            rl_str = "OFF"
        else:
            rl_str = " ON"
        logging.info("[RL-{}] Iter: {} / {}".format(rl_str, idx + 1,
                                                    total_iters))
        num_iter = 1
        tic = time.time()

        train_usage_sample = []
        for batch_step in range(total_steps):
            train_batch = train_set[batch_step * batch_size:(batch_step + 1) *
                                    batch_size, :]

            if idx >= global_args["rl_iter"]:
                # 1. soft_result
                # 2. map_result
                # 3. advantage -> reward -> optimize
                [soft_probs, soft_action] = sess.run(
                    [model.probs, policy_net.action_predict],
                    feed_dict={
                        model.input_test:
                        train_batch,
                        policy_net.input:
                        train_batch,
                        policy_net.method:
                        np.array(0),
                        policy_net.sample_action:
                        np.ones((batch_size, action_nums)),
                    },
                )
                [hard_probs, hard_action] = sess.run(
                    [model.probs, policy_net.action_predict],
                    feed_dict={
                        model.input_test:
                        train_batch,
                        policy_net.input:
                        train_batch,
                        policy_net.method:
                        np.array(1),
                        policy_net.sample_action:
                        np.ones((batch_size, action_nums)),
                    },
                )
                ground_truth = train_batch[:, -1]
                reward_soft = reward_fn(soft_probs, ground_truth, soft_action,
                                        global_args["gamma"])
                reward_hard = reward_fn(hard_probs, ground_truth, hard_action,
                                        global_args["gamma"])
                reward_train = reward_soft - reward_hard
                _, _, _, action, loss, rl_loss = sess.run(
                    [
                        train_policy_rl,
                        train_policy,
                        train_model,
                        policy_net.action_predict,
                        model.loss,
                        policy_net.rl_loss,
                    ],
                    feed_dict={
                        model.input_train: train_batch,
                        policy_net.input: train_batch,
                        policy_net.method: np.array(-1),
                        policy_net.sample_action: soft_action,
                        policy_net.reward: reward_train,
                    },
                )
                train_usage_sample.extend(np.array(action).tolist())
                if num_iter % log_meter == 0:
                    logging.info(
                        "\t{:5d} /{:5d} Loss: {:.3f}, RL-Loss: {:.3f}, Reward-Avg: {:.3f}"
                        .format(
                            batch_step + 1,
                            total_steps,
                            loss,
                            rl_loss,
                            np.mean(reward_train),
                        ))
                num_iter += 1
            else:
                [hard_action] = sess.run(
                    [policy_net.action_predict],
                    feed_dict={
                        policy_net.method:
                        np.array(1),
                        policy_net.input:
                        train_batch,
                        policy_net.sample_action:
                        np.ones((batch_size, action_nums)),
                    },
                )
                [_, _, action, loss] = sess.run(
                    [
                        train_model, train_policy, policy_net.action_predict,
                        model.loss
                    ],
                    feed_dict={
                        model.input_train: train_batch,
                        policy_net.input: train_batch,
                        policy_net.sample_action: hard_action,
                        policy_net.method: np.array(-1),
                    },
                )
                train_usage_sample.extend(np.array(action).tolist())

                if num_iter % log_meter == 0:
                    logging.info("\t{:5d} /{:5d} Loss: {:.3f}".format(
                        batch_step + 1, total_steps, loss))
                num_iter += 1

        summary_block(train_usage_sample, len(model_args["dilations"]),
                      "Train")

        # 1. eval model
        mrr_list = {5: [], 20: []}
        hr_list = {5: [], 20: []}
        ndcg_list = {5: [], 20: []}

        test_usage_sample = []
        for batch_step in range(test_steps):
            test_batch = test_set[batch_step * batch_size:(batch_step + 1) *
                                  batch_size, :]
            action, pred_probs = sess.run(
                [policy_net.action_predict, model.probs],
                feed_dict={
                    model.input_test: test_batch,
                    policy_net.input: test_batch,
                    policy_net.method: np.array(1),
                    policy_net.sample_action: np.ones(
                        (batch_size, action_nums)),
                },
            )
            test_usage_sample.extend(np.array(action).tolist())

            ground_truth = test_batch[:, -1]
            top_5_rank, top_20_rank = sample_top_ks(pred_probs, [5, 20])
            indices_5 = [
                np.argwhere(line == item)
                for line, item in zip(top_5_rank, ground_truth)
            ]
            indices_20 = [
                np.argwhere(line == item)
                for line, item in zip(top_20_rank, ground_truth)
            ]

            mrr5_sub, hr5_sub, ndcg5_sub = get_metric(indices_5)
            mrr20_sub, hr20_sub, ndcg20_sub = get_metric(indices_20)

            mrr_list[5].extend(mrr5_sub), mrr_list[20].extend(mrr20_sub)
            hr_list[5].extend(hr5_sub), hr_list[20].extend(hr20_sub)
            ndcg_list[5].extend(ndcg5_sub), ndcg_list[20].extend(ndcg20_sub)

        summary_block(test_usage_sample, len(model_args["dilations"]), "Test")

        ndcg_5, ndcg_20 = np.mean(ndcg_list[5]), np.mean(ndcg_list[20])
        mrr_5, mrr_20 = np.mean(mrr_list[5]), np.mean(mrr_list[20])
        hr_5, hr_20 = np.mean(hr_list[5]), np.mean(hr_list[20])

        logging.info("<Metric>::TestSet")
        logging.info(
            "\t MRR@5: {:.4f},  HIT@5: {:.4f},  NDCG@5: {:.4f}".format(
                mrr_5, hr_5, ndcg_5))
        logging.info(
            "\tMRR@20: {:.4f}, HIT@20: {:.4f}, NDCG@20: {:.4f}".format(
                mrr_20, hr_20, ndcg_20))

        mrr_at5 = mrr_5

        # 2. save model
        if mrr_at5 > best_mrr_at5:
            logging.info(
                ">>>>> Saving model due to better MRR@5: {:.4f} <<<<< ".format(
                    mrr_at5))
            saver.save(
                sess,
                os.path.join(model_save_path,
                             "{}_{}.tfkpt".format(model_name, num_iter)),
            )
            best_mrr_at5 = mrr_at5

        toc = time.time()
        logging.info("Iter: {} / {} finish. Time: {:.2f} min".format(
            idx + 1, total_iters, (toc - tic) / 60))

    sess.close()