Esempio n. 1
0
def train():
    batch_size = configs["batch_size"]
    log_meter = configs["log_every"]
    total_steps = int(train_set.shape[0] / batch_size)

    train_usage_sample = []

    first, last = data_loader.first_target, data_loader.last_target
    for batch_step in range(total_steps):
        f, t = batch_step * batch_size, (batch_step + 1) * batch_size
        item_batch = train_set[f:t, :]  # [B, L+1]

        context_batch = item_batch[:, :-1]  # [B, L]
        pos_target = item_batch[:, -1:]  # [B, 1]
        neg_target = np.array([[random_neg(first, last, s[0])]
                               for s in pos_target])  # [B, 1]

        _, loss_out, action = sess.run(
            [train_op, target_model.train_loss, policy_model.actions_train],
            feed_dict={
                policy_model.input: context_batch,
                source_model.input_source_train: context_batch,
                target_model.input_train_pos: pos_target,
                target_model.input_train_neg: neg_target,
            },
        )
        train_usage_sample.extend(np.array(action).tolist())
        if (batch_step + 1) % log_meter == 0:
            logging.info("\t<{:5d}/{:5d}> Loss: {:.4f}".format(
                batch_step + 1, total_steps, loss_out))

    if configs["method"] == "hard":
        summary_block(train_usage_sample, len(configs["dilations"]), "Train")
Esempio n. 2
0
def train_rl_off():
    batch_size = configs["batch_size"]
    log_meter = configs["log_every"]

    total_steps = int(train_set.shape[0] / batch_size)

    action_nums = len(configs["dilations"])
    first, last = data_loader.first_target, data_loader.last_target

    train_usage_sample = []
    for batch_step in range(total_steps):
        f, t = batch_step * batch_size, (batch_step + 1) * batch_size
        item_batch = train_set[f:t, :]  # [B, L+1]

        context_batch = item_batch[:, :-1]  # [B, L]

        pos_target = item_batch[:, -1:]  # [B, 1]
        neg_target = np.array([[random_neg(first, last, s[0])]
                               for s in pos_target])

        hard_action = sess.run(
            policy_model.test_action,
            feed_dict={
                policy_model.input: context_batch,
                policy_model.method: np.array(1),
                policy_model.sample_action: np.ones((batch_size, action_nums)),
            },
        )

        _, action, loss = sess.run(
            [
                train_finetune, policy_model.train_action,
                target_model.train_loss
            ],
            feed_dict={
                source_model.input_source_train: context_batch,
                policy_model.input: context_batch,
                policy_model.method: np.array(-1),
                policy_model.sample_action: hard_action,
                target_model.input_train_pos: pos_target,
                target_model.input_train_neg: neg_target,
            },
        )

        train_usage_sample.extend(np.array(action).tolist())
        if (batch_step + 1) % log_meter == 0:
            logging.info("\t<{:5d}/{:5d}> Loss: {:.4f}".format(
                batch_step + 1, total_steps, loss))

    summary_block(train_usage_sample, action_nums, "Train")
Esempio n. 3
0
def evaluate():
    batch_size = configs["batch_size"]
    n_neg = configs["n_neg"]

    total_steps = int(test_set.shape[0] / batch_size)
    action_nums = len(configs["dilations"])

    meter = SRSMetric(k_list=[5, 20])
    meter.setup_and_clean()

    test_usage_sample = []
    for batch_step in range(total_steps):
        f, t = batch_step * batch_size, (batch_step + 1) * batch_size
        batch = test_set[f:t, :]  # [B, L+1]

        context = batch[:, :-1]
        pos_target = batch[:, -1:]
        neg_target = [
            random_negs(l=1, r=data_loader.target_nums, size=n_neg, pos=s[0])
            for s in pos_target
        ]
        target = np.concatenate([neg_target, pos_target], 1)  # [n_neg*neg+pos]

        test_probs, action = sess.run(
            [target_model.test_probs, policy_model.test_action],
            feed_dict={
                source_model.input_source_test: context,
                policy_model.input: context,
                policy_model.method: np.array(1),
                policy_model.sample_action: np.ones((batch_size, action_nums)),
                target_model.input_test: target,
            },
        )
        ground_truth = [[n_neg]] * batch_size
        meter.submit(test_probs, ground_truth)

        test_usage_sample.extend(np.array(action).tolist())

    summary_block(test_usage_sample, len(configs["dilations"]), "Test")

    meter.calc()
    meter.output_to_logger()

    return meter.mrr[5]
Esempio n. 4
0
def train_rl_on():
    batch_size = configs["batch_size"]
    log_meter = configs["log_every"]
    reward_k = configs["reward_k"]
    n_neg = configs["n_neg"]
    gamma = configs["gamma"]

    action_nums = len(configs["dilations"])
    total_steps = int(train_set.shape[0] / batch_size)
    first, last = data_loader.first_target, data_loader.last_target

    train_usage_sample = []
    for batch_step in range(total_steps):
        f, t = batch_step * batch_size, (batch_step + 1) * batch_size
        item_batch = train_set[f:t, :]  # [B, L+1]

        context_batch = item_batch[:, :-1]  # [B, L]

        pos_target = item_batch[:, -1:]  # [B, 1]
        neg_target_train = np.array([[random_neg(first, last, s[0])]
                                     for s in pos_target])

        neg_target_test = [
            random_negs(l=1, r=data_loader.target_nums, size=n_neg, pos=s[0])
            for s in pos_target
        ]
        target = np.concatenate([neg_target_test, pos_target],
                                1)  # [n_neg*NEG+POS]

        # [B, n_neg + 1], [B, #Blocks]
        [soft_probs, soft_action] = sess.run(
            [target_model.test_probs, policy_model.test_action],
            feed_dict={
                source_model.input_source_test: context_batch,
                policy_model.input: context_batch,
                policy_model.method: np.array(0),
                policy_model.sample_action: np.ones((batch_size, action_nums)),
                target_model.input_test: target,
            },
        )
        # [B, n_neg + 1], [B, #Blocks]
        [hard_probs, hard_action] = sess.run(
            [target_model.test_probs, policy_model.test_action],
            feed_dict={
                source_model.input_source_test: context_batch,
                policy_model.input: context_batch,
                policy_model.method: np.array(1),
                policy_model.sample_action: np.ones((batch_size, action_nums)),
                target_model.input_test: target,
            },
        )

        reward_soft = reward_fn(soft_probs,
                                n_neg,
                                soft_action,
                                gamma,
                                k=reward_k)
        reward_hard = reward_fn(hard_probs,
                                n_neg,
                                hard_action,
                                gamma,
                                k=reward_k)
        reward_train = reward_soft - reward_hard

        _, _, action, loss, rl_loss = sess.run(
            [
                train_rl, train_finetune, policy_model.train_action,
                target_model.train_loss, policy_model.rl_loss
            ],
            feed_dict={
                source_model.input_source_train: context_batch,
                policy_model.input: context_batch,
                policy_model.method: np.array(-1),
                policy_model.sample_action: soft_action,
                policy_model.reward: reward_train,
                target_model.input_train_pos: pos_target,
                target_model.input_train_neg: neg_target_train,
            },
        )

        train_usage_sample.extend(np.array(action).tolist())
        if (batch_step + 1) % log_meter == 0:
            logging.info(
                "\t<{:5d}/{:5d}> Loss: {:.4f}, RL-Loss: {:+.4f}, Reward-Avg: {:+.4f}"
                .format(batch_step + 1, total_steps, loss, rl_loss,
                        np.mean(reward_train)))

    summary_block(train_usage_sample, len(configs["dilations"]), "Train")