Exemple #1
0
def eps_greedy_sampling(td_ct, eval_td_ct, args, conf, summary_writer,
                        epoch_id):
    """eps_greedy_sampling"""
    dataset = NpzDataset(conf.test_npz_list,
                         conf.npz_config_path,
                         conf.requested_npz_names,
                         if_random_shuffle=False)
    data_gen = dataset.get_data_generator(conf.batch_size)

    list_reward = []
    last_batch_data = BatchData(conf, data_gen.next())
    batch_id = 0
    for tensor_dict in data_gen:
        ### eps_greedy_sampling
        batch_data = BatchData(conf, tensor_dict)
        batch_data.set_decode_len(batch_data.seq_lens())
        batch_data.expand_candidates(last_batch_data, batch_data.seq_lens())

        fetch_dict = td_ct.eps_greedy_sampling(
            GenRLFeedConvertor.eps_greedy_sampling(batch_data, eps=0))
        sampled_id = np.array(fetch_dict['sampled_id']).reshape([-1])
        order = sequence_unconcat(sampled_id, batch_data.decode_len())

        ### get reward
        reordered_batch_data = batch_data.get_reordered(order)
        fetch_dict = eval_td_ct.inference(
            EvalFeedConvertor.inference(reordered_batch_data))
        reward = np.array(fetch_dict['click_prob'])[:, 1]

        ### logging
        list_reward.append(np.mean(reward))

        if batch_id == 100:
            break

        last_batch_data = BatchData(conf, tensor_dict)
        batch_id += 1

    add_scalar_summary(summary_writer, epoch_id,
                       'eps_greedy_sampling/reward-%s' % args.eval_exp,
                       np.mean(list_reward))
Exemple #2
0
def test(td_ct, args, conf, summary_writer, epoch_id):
    """eval auc on the full test dataset"""
    dataset = NpzDataset(conf.test_npz_list,
                         conf.npz_config_path,
                         conf.requested_npz_names,
                         if_random_shuffle=False)
    data_gen = dataset.get_data_generator(conf.batch_size)

    auc_metric = AUCMetrics()
    seq_rmse_metric = SequenceRMSEMetrics()
    seq_correlation_metric = SequenceCorrelationMetrics()
    batch_id = 0
    for tensor_dict in data_gen:
        batch_data = BatchData(conf, tensor_dict)
        fetch_dict = td_ct.test(EvalFeedConvertor.train_test(batch_data))
        click_id = np.array(fetch_dict['click_id']).flatten()
        click_prob = np.array(fetch_dict['click_prob'])[:, 1]
        click_id_unconcat = sequence_unconcat(click_id, batch_data.seq_lens())
        click_prob_unconcat = sequence_unconcat(click_prob,
                                                batch_data.seq_lens())
        auc_metric.add(labels=click_id, y_scores=click_prob)
        for sub_click_id, sub_click_prob in zip(click_id_unconcat,
                                                click_prob_unconcat):
            seq_rmse_metric.add(labels=sub_click_id, preds=sub_click_prob)
            seq_correlation_metric.add(labels=sub_click_id,
                                       preds=sub_click_prob)

        batch_id += 1

    add_scalar_summary(summary_writer, epoch_id, 'test/auc',
                       auc_metric.overall_auc())
    add_scalar_summary(summary_writer, epoch_id, 'test/seq_rmse',
                       seq_rmse_metric.overall_rmse())
    add_scalar_summary(summary_writer, epoch_id, 'test/seq_correlation',
                       seq_correlation_metric.overall_correlation())
Exemple #3
0
def test(ct, args, conf, summary_writer, epoch_id, item_shuffle=False):
    """eval auc on the full test dataset"""
    dataset = NpzDataset(args.test_npz_list,
                         conf.npz_config_path,
                         conf.requested_names,
                         if_random_shuffle=True)
    data_gen = dataset.get_data_generator(conf.batch_size)

    click_rmse_metric = RMSEMetrics()
    click_accu_metric = AccuracyMetrics()
    for batch_id, tensor_dict in enumerate(
            threaded_generator(data_gen, capacity=100)):
        batch_data = BatchData(conf, tensor_dict)
        fetch_dict = ct.train(SimFeedConvertor.train_test(batch_data))
        click_id = np.array(fetch_dict['click_id']).flatten()
        click_score = click_prob_2_score(np.array(
            fetch_dict['click_prob'])).flatten()
        click_rmse_metric.add(labels=click_id, preds=click_score)
        click_accu_metric.add(labels=click_id,
                              probs=np.array(fetch_dict['click_prob']))

    add_scalar_summary(summary_writer, epoch_id, 'test/click_rmse',
                       click_rmse_metric.overall_rmse())
    add_scalar_summary(summary_writer, epoch_id, 'test/click_accuracy',
                       click_accu_metric.overall_accuracy())
    for key, value in click_accu_metric.overall_metrics().items():
        add_scalar_summary(summary_writer, epoch_id, 'test/%s' % key, value)
Exemple #4
0
def online_inference_for_test(args, epoch_id, max_steps, ct_sim, dict_gen_ct, summary_writer):
    """
    Do inference on the test test.
    """
    sim_conf = ct_sim.alg.model.conf
    dataset = NpzDataset(args.test_npz_list, 
                        sim_conf.npz_config_path, 
                        sim_conf.requested_names,
                        if_random_shuffle=False,
                        one_pass=True)
    data_gen = dataset.get_data_generator(sim_conf.batch_size)
    thread_data_gen = threaded_generator(data_gen, capacity=100)

    list_sim_responses = []
    ### online inference
    last_batch_data = BatchData(sim_conf, thread_data_gen.next())
    for batch_id, tensor_dict in enumerate(thread_data_gen):
        if batch_id > max_steps:
            break
        np.random.seed(batch_id)
        batch_data = BatchData(sim_conf, tensor_dict)
        batch_data.set_decode_len(batch_data.seq_lens())
        batch_data.expand_candidates(last_batch_data, batch_data.seq_lens())
        np.random.seed(None)
        del batch_data.tensor_dict['click_id']

        orders, sim_responses = inference_one_batch(args.gen_type, ct_sim, dict_gen_ct, batch_data, eps=0) # , (b, decode_len)

        # save to replay memory
        sim_batch_data = batch_data.get_reordered(orders, sim_responses)
        list_sim_responses.append(sim_responses)
        last_batch_data = BatchData(sim_conf, tensor_dict)

        if batch_id % 100 == 0:
            logging.info('inference test batch %d' % batch_id)

    list_sum_response = np.sum(np.concatenate(list_sim_responses, 0), 1)    # (b,)
    add_scalar_summary(summary_writer, epoch_id, 'inference/test_sim_responses', np.mean(list_sum_response))
Exemple #5
0
def train(ct, args, conf, summary_writer, epoch_id):
    """train for conf.train_interval steps"""
    dataset = NpzDataset(args.train_npz_list,
                         conf.npz_config_path,
                         conf.requested_names,
                         if_random_shuffle=True)
    data_gen = dataset.get_data_generator(conf.batch_size)

    list_loss = []
    list_epoch_loss = []
    for batch_id, tensor_dict in enumerate(
            threaded_generator(data_gen, capacity=100)):
        batch_data = BatchData(conf, tensor_dict)
        fetch_dict = ct.train(SimFeedConvertor.train_test(batch_data))
        list_loss.append(np.array(fetch_dict['loss']))
        list_epoch_loss.append(np.mean(np.array(fetch_dict['loss'])))
        if batch_id % conf.prt_interval == 0:
            logging.info('batch_id:%d loss:%f' %
                         (batch_id, np.mean(list_loss)))
            list_loss = []

    add_scalar_summary(summary_writer, epoch_id, 'train/loss',
                       np.mean(list_epoch_loss))
Exemple #6
0
def online_inference(args, epoch_id, max_steps, data_gen, ct_sim, dict_gen_ct, summary_writer, if_print=True):
    """
    Do inference for `max_steps` batches.
    """
    sim_conf = ct_sim.alg.model.conf

    replay_memory = []
    list_sim_responses = []
    ### online inference
    last_batch_data = BatchData(sim_conf, data_gen.next())
    for batch_id in range(max_steps):
        np.random.seed(epoch_id * max_steps + batch_id)
        tensor_dict = data_gen.next()
        batch_data = BatchData(sim_conf, tensor_dict)
        batch_data.set_decode_len(batch_data.seq_lens())
        batch_data.expand_candidates(last_batch_data, batch_data.seq_lens())
        np.random.seed(None)
        del batch_data.tensor_dict['click_id']

        if batch_data.batch_size() == 1:    # otherwise, rl will crash
            continue

        orders, sim_responses = inference_one_batch(args.gen_type, ct_sim, dict_gen_ct, batch_data, eps=args.infer_eps) # , (b, decode_len)

        # save to replay memory
        sim_batch_data = batch_data.get_reordered(orders, sim_responses)
        replay_memory.append(sim_batch_data)
        list_sim_responses.append(sim_responses)
        last_batch_data = BatchData(sim_conf, tensor_dict)

        if batch_id % 100 == 0 and if_print:
            logging.info('inference epoch %d batch %d' % (epoch_id, batch_id))

    if if_print:
        list_sum_response = np.sum(np.concatenate(list_sim_responses, 0), 1)    # (b,)
        add_scalar_summary(summary_writer, epoch_id, 'inference/sim_responses', np.mean(list_sum_response))
    return replay_memory
Exemple #7
0
def train(td_ct, args, conf, summary_writer, epoch_id):
    """train for conf.train_interval steps"""
    dataset = NpzDataset(conf.train_npz_list,
                         conf.npz_config_path,
                         conf.requested_npz_names,
                         if_random_shuffle=True)
    data_gen = dataset.get_data_generator(conf.batch_size)

    list_epoch_loss = []
    list_loss = []
    batch_id = 0
    for tensor_dict in data_gen:
        batch_data = BatchData(conf, tensor_dict)
        fetch_dict = td_ct.train(GenSLFeedConvertor.train_test(batch_data))
        list_loss.append(np.array(fetch_dict['loss']))
        list_epoch_loss.append(np.mean(np.array(fetch_dict['loss'])))
        if batch_id % conf.prt_interval == 0:
            logging.info('batch_id:%d loss:%f' %
                         (batch_id, np.mean(list_loss)))
            list_loss = []
        batch_id += 1

    add_scalar_summary(summary_writer, epoch_id, 'train/loss',
                       np.mean(list_epoch_loss))
Exemple #8
0
def log_train(td_ct, args, conf, summary_writer, replay_memory, epoch_id):
    """train"""
    dataset = NpzDataset(conf.train_npz_list,
                         conf.npz_config_path,
                         conf.requested_npz_names,
                         if_random_shuffle=True)
    data_gen = dataset.get_data_generator(conf.batch_size)

    list_reward = []
    list_loss = []
    list_first_Q = []
    guessed_batch_num = 11500
    batch_id = 0
    last_batch_data = BatchData(conf, data_gen.next())
    for tensor_dict in data_gen:
        ### eps_greedy_sampling
        batch_data = BatchData(conf, tensor_dict)
        batch_data.set_decode_len(batch_data.seq_lens())
        order = [np.arange(d) for d in batch_data.decode_len()]

        ### get reward
        reordered_batch_data = batch_data.get_reordered(order)
        reordered_batch_data.set_decode_len(batch_data.decode_len())
        reward = batch_data.tensor_dict['click_id'].values

        ### save to replay_memory
        replay_memory.append((reordered_batch_data, reward))

        ### train
        memory_batch_data, reward = replay_memory[np.random.randint(
            len(replay_memory))]
        feed_dict = GenRLFeedConvertor.train_test(memory_batch_data, reward)
        fetch_dict = td_ct.train(feed_dict)

        ### logging
        list_reward.append(np.mean(reward))
        list_loss.append(np.array(fetch_dict['loss']))
        list_first_Q.append(np.mean(np.array(fetch_dict['c_Q'])[0]))
        if batch_id % 10 == 0:
            global_batch_id = epoch_id * guessed_batch_num + batch_id
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_reward', np.mean(list_reward))
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_loss', np.mean(list_loss))
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_1st_Q', np.mean(list_first_Q))
            list_reward = []
            list_loss = []
            list_first_Q = []

        last_batch_data = BatchData(conf, tensor_dict)
        batch_id += 1
Exemple #9
0
def train(td_ct, eval_td_ct, args, conf, summary_writer, replay_memory,
          epoch_id):
    """train"""
    dataset = NpzDataset(conf.train_npz_list,
                         conf.npz_config_path,
                         conf.requested_npz_names,
                         if_random_shuffle=True)
    data_gen = dataset.get_data_generator(conf.batch_size)

    list_reward = []
    list_loss = []
    list_first_Q = []
    guessed_batch_num = 11500
    batch_id = 0
    last_batch_data = BatchData(conf, data_gen.next())
    for tensor_dict in data_gen:
        ### eps_greedy_sampling
        batch_data = BatchData(conf, tensor_dict)
        batch_data.set_decode_len(batch_data.seq_lens())
        batch_data.expand_candidates(last_batch_data, batch_data.seq_lens())

        fetch_dict = td_ct.eps_greedy_sampling(
            GenRLFeedConvertor.eps_greedy_sampling(batch_data, eps=0.2))
        sampled_id = np.array(fetch_dict['sampled_id']).reshape([-1])
        order = sequence_unconcat(sampled_id, batch_data.decode_len())

        ### get reward
        reordered_batch_data = batch_data.get_reordered(order)
        fetch_dict = eval_td_ct.inference(
            EvalFeedConvertor.inference(reordered_batch_data))
        reward = np.array(fetch_dict['click_prob'])[:, 1]

        ### save to replay_memory
        reordered_batch_data2 = batch_data.get_reordered_keep_candidate(order)
        reordered_batch_data2.set_decode_len(batch_data.decode_len())
        replay_memory.append((reordered_batch_data2, reward))

        ### train
        memory_batch_data, reward = replay_memory[np.random.randint(
            len(replay_memory))]
        feed_dict = GenRLFeedConvertor.train_test(memory_batch_data, reward)
        fetch_dict = td_ct.train(feed_dict)

        ### logging
        list_reward.append(np.mean(reward))
        list_loss.append(np.array(fetch_dict['loss']))
        list_first_Q.append(np.mean(np.array(fetch_dict['c_Q'])[0]))
        if batch_id % 10 == 0:
            global_batch_id = epoch_id * guessed_batch_num + batch_id
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_reward', np.mean(list_reward))
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_loss', np.mean(list_loss))
            add_scalar_summary(summary_writer, global_batch_id,
                               'train/rl_1st_Q', np.mean(list_first_Q))
            list_reward = []
            list_loss = []
            list_first_Q = []

        last_batch_data = BatchData(conf, tensor_dict)
        batch_id += 1
Exemple #10
0
def offline_training(args, epoch_id, replay_memory, dict_gen_ct, summary_writer, if_save=True, env_rl_data_gen=None):
    """
    Do offline train on the replay_memory.
    """
    ### offline train env model
    if args.gen_type in ['env', 'env_credit', 'env_rl']:
        list_loss = []
        for sim_batch_data in replay_memory:
            ct_env = dict_gen_ct['env']
            fetch_dict = ct_env.train(EnvFeedConvertor.train_test(sim_batch_data, args.env_item_dropout_rate, ct_env.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/env_loss', np.mean(list_loss))
            ct_env.save_model(epoch_id)

    ### offline train credit
    if args.gen_type == 'env_credit':
        if args.credit_type == 'gt_globbase':
            globbase = get_globbase(replay_memory)  # (seq_len,)
            print('globbase', globbase.tolist())
        else:
            globbase = None
        list_loss = []
        for sim_batch_data in replay_memory:
            ct_env = dict_gen_ct['env']
            ct_credit = dict_gen_ct['credit']
            credit = generate_credit_one_batch(ct_env, 
                                            sim_batch_data, 
                                            credit_type=args.credit_type, 
                                            credit_gamma=args.credit_gamma,
                                            globbase=globbase)
            fetch_dict = ct_credit.train(CreditFeedConvertor.train_test(sim_batch_data, credit, ct_credit.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/credit_loss', np.mean(list_loss))
            ct_credit.save_model(epoch_id)

    ### offline train mc_credit
    if args.gen_type == 'mc_credit':
        list_loss = []
        for sim_batch_data in replay_memory:
            ct_credit = dict_gen_ct['credit']
            credit = generate_credit_one_batch(None, 
                                            sim_batch_data, 
                                            credit_type='mc', 
                                            credit_gamma=args.credit_gamma,
                                            globbase=None)
            fetch_dict = ct_credit.train(CreditFeedConvertor.train_test(sim_batch_data, credit, ct_credit.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/credit_loss', np.mean(list_loss))
            ct_credit.save_model(epoch_id)

    ### offline train rl
    if args.gen_type == 'rl':
        list_loss = []
        for sim_batch_data in replay_memory:
            ct_rl = dict_gen_ct['rl']
            fetch_dict = ct_rl.train(RLFeedConvertor.train_test(sim_batch_data, ct_rl.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/rl_loss', np.mean(list_loss))  
            ct_rl.save_model(epoch_id)  

    ### offline train ddpg
    if args.gen_type == 'ddpg':
        list_actor_loss = []
        list_critic_loss = []
        for batch_id, sim_batch_data in enumerate(replay_memory):
            ct_ddpg = dict_gen_ct['ddpg']
            fetch_dict = ct_ddpg.train(DDPGFeedConvertor.train_test(sim_batch_data, ct_ddpg.alg.model.conf))
            list_actor_loss.append(np.array(fetch_dict['actor_loss']))
            list_critic_loss.append(np.array(fetch_dict['critic_loss']))
            if batch_id % 100 == 0:
                print(epoch_id, batch_id, 'train/ddpg_actor_loss', np.mean(np.array(fetch_dict['actor_loss'])))
                print(epoch_id, batch_id, 'train/ddpg_critic_loss', np.mean(np.array(fetch_dict['critic_loss'])))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_total_loss', np.mean(list_actor_loss) + np.mean(list_critic_loss))  
            add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_actor_loss', np.mean(list_actor_loss))  
            add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_critic_loss', np.mean(list_critic_loss))  
            ct_ddpg.save_model(epoch_id)  

    ### offline train cf model
    if args.gen_type == 'cf':
        list_loss = []
        for sim_batch_data in replay_memory:
            ct_cf = dict_gen_ct['cf']
            fetch_dict = ct_cf.train(CFFeedConvertor.train_test(sim_batch_data, ct_cf.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/cf_loss', np.mean(list_loss))
            ct_cf.save_model(epoch_id)

    ### offline train rl with additional data from env
    if args.gen_type == 'env_rl':
        ct_env = dict_gen_ct['env']
        max_env_train_steps = len(replay_memory)
        env_replay_memory = online_inference(args, 0, max_env_train_steps, env_rl_data_gen, ct_env, dict_gen_ct, None, if_print=False)
        list_loss = []
        for sim_batch_data in replay_memory + env_replay_memory:
            ct_rl = dict_gen_ct['rl']
            fetch_dict = ct_rl.train(RLFeedConvertor.train_test(sim_batch_data, ct_rl.alg.model.conf))
            list_loss.append(np.array(fetch_dict['loss']))
        if if_save:
            add_scalar_summary(summary_writer, epoch_id, 'train/rl_loss', np.mean(list_loss))  
            ct_rl.save_model(epoch_id)  
Exemple #11
0
                val_loss = compute_loss_fn(val_network_output,
                                           val_item["bboxes_preprocessed"],
                                           val_item["labels_preprocessed"])
                val_loss_sum += val_loss

            train_loss = train_loss_sum / config["train"]["val_iter"]
            val_loss = val_loss_sum / config["train"]["val_batch"]

            utils.add_item_summary(train_item, train_network_output,
                                   train_summary_writer, anchors,
                                   config["dataset"]["num_classes"])
            utils.add_item_summary(val_item, val_network_output,
                                   val_summary_writer, anchors,
                                   config["dataset"]["num_classes"])

            utils.add_scalar_summary([train_loss], ["train_loss"],
                                     train_summary_writer)
            utils.add_scalar_summary([val_loss], ["val_loss"],
                                     val_summary_writer)

            print("Loss at step {:04d}: train loss: {:.3f}, val loss: {:3f}".
                  format(train_index, train_loss, val_loss))

            train_loss_sum = 0

        if train_index != 0 and train_index % config["test"]["test_iter"] == 0:
            for test_index, test_item in enumerate(test_ds):
                if (config["test"]["test_batch"] is not None
                        and test_index % config["test"]["test_batch"]):
                    break
                test_network_output = od_model(test_item["image"],
                                               training=False)
Exemple #12
0
            train_c_loss = train_c_loss_sum / config["train"]["val_iter"]
            train_r_loss = train_r_loss_sum / config["train"]["val_iter"]

            val_loss = val_loss_sum / config["train"]["val_batch"]
            val_c_loss = val_c_loss_sum / config["train"]["val_batch"]
            val_r_loss = val_r_loss_sum / config["train"]["val_batch"]

            utils.add_item_summary(train_item, train_network_output,
                                   train_summary_writer, anchors,
                                   config["dataset"]["num_classes"])
            utils.add_item_summary(val_item, val_network_output,
                                   val_summary_writer, anchors,
                                   config["dataset"]["num_classes"])

            utils.add_scalar_summary(
                [train_loss, train_c_loss, train_r_loss],
                ["loss", "classificaition_loss", "regression_loss"],
                train_summary_writer)

            utils.add_scalar_summary(
                [val_loss, val_c_loss, val_r_loss],
                ["loss", "classificaition_loss", "regression_loss"],
                val_summary_writer)

            print("Loss at step {:04d}: train loss: {:.3f}, val loss: {:3f}".
                  format(train_index, train_loss, val_loss))

            train_loss_sum = 0
            train_c_loss_sum = 0
            train_r_loss_sum = 0

        if train_index != 0 and train_index % config["test"]["test_iter"] == 0: