コード例 #1
0
def _get_rewards_01(config, data_loader, x_fake_for_rewards, eof_code, sess,
                    first, all_bleu_metrics):
    batch_size = config['batch_size']
    gan_type = config['gan_type']
    seq_len = config['seq_len']
    vocab_size = config['vocab_size']
    rl_bleu_ref_count = data_loader.num_batch * batch_size  # all of training set # 3000
    # rl_n_grams = 4
    rl_mc_samples = 1
    gamma_discount = 0.9

    rewards = np.zeros((batch_size, seq_len), np.float32)

    if first == True:
        bleu_metric_2 = list()
        bleu_metric_3 = list()
        bleu_metric_4 = list()
        bleu_metric_5 = list()

        # train_refs = data_loader.random_some(rl_bleu_ref_count, seq_len+1)
        train_refs = data_loader.get_as_lol_no_padding()
        # np_train_refs = np.array(train_refs)

        for t in range(2, seq_len + 1):
            # train_refs = data_loader.random_some(rl_bleu_ref_count, t)
            # bleu_metric_2.append(Bleu.from_references_indices(2, train_refs))
            bleu_metric_2.append(
                Bleu.from_references_indices(2, [l[:t] for l in train_refs]))

        for t in range(3, seq_len + 1):
            # train_refs = data_loader.random_some(rl_bleu_ref_count, t)
            bleu_metric_3.append(
                Bleu.from_references_indices(3, [l[:t] for l in train_refs]))

        for t in range(4, seq_len + 1):
            # train_refs = data_loader.random_some(rl_bleu_ref_count, t)
            bleu_metric_4.append(
                Bleu.from_references_indices(4, [l[:t] for l in train_refs]))

        for t in range(5, seq_len + 1):
            # train_refs = data_loader.random_some(rl_bleu_ref_count, t)
            bleu_metric_5.append(
                Bleu.from_references_indices(5, [l[:t] for l in train_refs]))

        # put the 5
        all_bleu_metrics = [
            bleu_metric_2, bleu_metric_3, bleu_metric_4, bleu_metric_5
        ]

        first = False

    for _ in range(rl_mc_samples):
        # samples_for_rewards, _ = self.generator.generate_from_noise(self.sess, batch_size, self.current_tau, Config.args.BATCH_SIZE)
        samples_for_rewards = sess.run(x_fake_for_rewards)
        gen_seq_list = samples_no_padding(samples_for_rewards, eof_code)
        for b in range(len(gen_seq_list)):
            rewards[b, :] = rewards[b, :] + _compute_rl_rewards_01(
                gen_seq_list[b], all_bleu_metrics, gamma_discount, seq_len)
    rewards = rewards / (1.0 * rl_mc_samples)
    return samples_for_rewards, rewards, first, all_bleu_metrics
コード例 #2
0
def _get_rewards_02(config, data_loader, x_fake_for_rewards, given_num, r_x,
                    r_gen_x, r_gen_x_sample, eof_code, sess, first,
                    all_bleu_metrics):
    # print("Start computing rewards ...")
    batch_size = config['batch_size']
    gan_type = config['gan_type']
    seq_len = config['seq_len']
    vocab_size = config['vocab_size']
    rl_bleu_ref_count = data_loader.num_batch * batch_size  # all of training set
    # rl_n_grams = 4
    rl_mc_samples = config['mc_samples']
    gamma_discount = 0.5

    # rewards = np.zeros((batch_size, seq_len), np.float32)

    if first == True:
        train_refs = data_loader.get_as_lol_no_padding()

        # train_refs = data_loader.random_some(rl_bleu_ref_count, seq_len + 1)
        bleu_metric_2 = Bleu.from_references_indices(2, train_refs)

        # train_refs = data_loader.random_some(rl_bleu_ref_count, seq_len + 1)
        bleu_metric_3 = Bleu.from_references_indices(3, train_refs)

        # train_refs = data_loader.random_some(rl_bleu_ref_count, seq_len + 1)
        bleu_metric_4 = Bleu.from_references_indices(4, train_refs)

        # train_refs = data_loader.random_some(rl_bleu_ref_count, seq_len + 1)
        bleu_metric_5 = Bleu.from_references_indices(5, train_refs)

        all_bleu_metrics = [
            bleu_metric_2, bleu_metric_3, bleu_metric_4, bleu_metric_5
        ]

        first = False

    rewards = list()
    samples_for_rewards = sess.run(x_fake_for_rewards)

    for i in range(rl_mc_samples):
        for given_num_i in range(1, seq_len):
            feed = {r_x: samples_for_rewards, given_num: given_num_i}
            roll_out_samples = sess.run(r_gen_x, feed)
            # feed = {discriminator.input_x: samples}
            # ypred_for_auc = sess.run(discriminator.ypred_for_auc, feed)
            ypred = _compute_rl_rewards_02(roll_out_samples, all_bleu_metrics,
                                           gamma_discount, eof_code)
            # ypred = np.array([item[1] for item in ypred_for_auc])
            if i == 0:
                rewards.append(ypred)
            else:
                rewards[given_num_i - 1] += ypred

        # the last token reward
        # feed = {discriminator.input_x: input_x}
        # ypred_for_auc = sess.run(discriminator.ypred_for_auc, feed)
        # ypred = np.array([item[1] for item in ypred_for_auc])
        ypred = _compute_rl_rewards_02(samples_for_rewards, all_bleu_metrics,
                                       gamma_discount, eof_code)
        if i == 0:
            rewards.append(ypred)
        else:
            rewards[(len(samples_for_rewards[0]) - 1)] += ypred

    # for _ in range(rl_mc_samples):
    #     # samples_for_rewards, _ = self.generator.generate_from_noise(self.sess, batch_size, self.current_tau, Config.args.BATCH_SIZE)
    #     samples_for_rewards = sess.run(x_fake_for_rewards)
    #     for b in range(len(samples_for_rewards)):
    #         rewards[b, :] = rewards[b, :] + _compute_rl_rewards(samples_for_rewards[b], all_bleu_metrics, gamma_discount)
    # rewards = rewards / (1.0 * rl_mc_samples)
    # return samples_for_rewards, rewards

    reward_res = np.transpose(np.array(rewards)) / (
        1.0 * rl_mc_samples)  # batch_size x seq_length
    if config['pg_baseline']:
        reward_res -= config['pg_baseline_val']  # 2.0 for emnlp
    # print("Rewards computed.")
    return samples_for_rewards, reward_res, first, all_bleu_metrics