Ejemplo n.º 1
0
def main(argv):
    # if len(argv) > 1:
    #   raise app.UsageError('Too many command-line arguments.')
    #
    # runner_lib.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
    seed = 0
    slate_size = 2
    np.random.seed(seed)
    env_config = {
        'num_candidates': 5,
        'slate_size': slate_size,
        'resample_documents': True,
        'seed': seed,
    }
    tmp_base_dir = 'C:/Users/Moham/PycharmProjects/ExpertDrivenRec/testing/'

    runner = runner_lib.TrainRunner(
        base_dir=tmp_base_dir,
        create_agent_fn=create_agent,
        env=interest_evolution.create_environment(env_config),
        episode_log_file='',
        max_training_steps=50,
        num_iterations=10)
    runner.run_experiment()

    runner = runner_lib.EvalRunner(
        base_dir=tmp_base_dir,
        create_agent_fn=create_agent,
        env=interest_evolution.create_environment(env_config),
        max_eval_episodes=5,
        test_mode=True)
    runner.run_experiment()
Ejemplo n.º 2
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    runner_lib.load_gin_configs(FLAGS.gin_files, FLAGS.gin_bindings)
    seed = 0
    slate_size = 2
    np.random.seed(seed)
    env_config = {
        'num_candidates': 5,
        'slate_size': slate_size,
        'resample_documents': True,
        'seed': seed,
    }

    runner = runner_lib.TrainRunner(
        base_dir=FLAGS.base_dir,
        create_agent_fn=create_agent,
        env=interest_evolution.create_environment(env_config),
        episode_log_file=FLAGS.episode_log_file,
        max_training_steps=50,
        num_iterations=10)
    runner.run_experiment()

    runner = runner_lib.EvalRunner(
        base_dir=FLAGS.base_dir,
        create_agent_fn=create_agent,
        env=interest_evolution.create_environment(env_config),
        max_eval_episodes=5,
        test_mode=True)
    runner.run_experiment()
Ejemplo n.º 3
0
 def test_recsim_interest_evolution(self):
     num_candidate = 10
     env_config = {
         "num_candidates": num_candidate,
         "slate_size": 3,
         "resample_documents": False,
         "seed": 1,
     }
     env = interest_evolution.create_environment(env_config)
     env = ValueWrapper(env, ValueMode.INNER_PROD)
     obs_preprocessor = make_default_obs_preprocessor(env)
     obs = env.reset()
     state = obs_preprocessor(obs)
     self.assertFalse(state.has_float_features_only)
     self.assertEqual(state.float_features.shape, (1, obs["user"].shape[0]))
     self.assertEqual(state.float_features.dtype, torch.float32)
     self.assertEqual(state.float_features.device, torch.device("cpu"))
     npt.assert_array_almost_equal(obs["user"], state.float_features.squeeze(0))
     doc_float_features = state.candidate_docs.float_features
     self.assertIsNotNone(doc_float_features)
     self.assertEqual(
         doc_float_features.shape, (1, num_candidate, obs["doc"]["0"].shape[0])
     )
     self.assertEqual(doc_float_features.dtype, torch.float32)
     self.assertEqual(doc_float_features.device, torch.device("cpu"))
     for i, v in enumerate(obs["doc"].values()):
         npt.assert_array_almost_equal(v, doc_float_features[0, i])
Ejemplo n.º 4
0
def make_recsim_env(config):
    DEFAULT_ENV_CONFIG = {
        "num_candidates": 10,
        "slate_size": 2,
        "resample_documents": True,
        "seed": 0,
        "convert_to_discrete_action_space": False,
    }
    env_config = DEFAULT_ENV_CONFIG.copy()
    env_config.update(config)
    env = interest_evolution.create_environment(env_config)
    env = RecSimResetWrapper(env)
    env = RecSimObservationSpaceWrapper(env)
    if config and config["convert_to_discrete_action_space"]:
        env = MultiDiscreteToDiscreteActionWrapper(env)
    return env
 def test_recsim_intereset_evolution(self):
     num_candidate = 10
     env_config = {
         "num_candidates": num_candidate,
         "slate_size": 3,
         "resample_documents": False,
         "seed": 1,
     }
     env = interest_evolution.create_environment(env_config)
     replay_buffer, inserted = _create_replay_buffer_and_insert(env)
     batch = replay_buffer.sample_transition_batch_tensor(indices=np.array([0]))
     npt.assert_array_almost_equal(
         inserted[0]["observation"]["user"].astype(np.float32),
         batch.state.squeeze(0),
     )
     npt.assert_array_almost_equal(
         inserted[1]["observation"]["user"], batch.next_state.squeeze(0)
     )
     docs = list(inserted[0]["observation"]["doc"].values())
     next_docs = list(inserted[1]["observation"]["doc"].values())
     for i in range(num_candidate):
         npt.assert_array_equal(docs[i], batch.doc.squeeze(0)[i])
         npt.assert_array_equal(next_docs[i], batch.next_doc.squeeze(0)[i])
     npt.assert_array_equal(inserted[0]["action"], batch.action.squeeze(0))
     npt.assert_array_equal(inserted[1]["action"], batch.next_action.squeeze(0))
     npt.assert_array_equal([0, 0, 0], batch.response_click.squeeze(0))
     npt.assert_array_equal([0, 0, 0], batch.response_cluster_id.squeeze(0))
     npt.assert_array_equal([0, 0, 0], batch.response_liked.squeeze(0))
     npt.assert_array_equal([0.0, 0.0, 0.0], batch.response_quality.squeeze(0))
     npt.assert_array_equal([0.0, 0.0, 0.0], batch.response_watch_time.squeeze(0))
     resp = inserted[1]["observation"]["response"]
     for i in range(env_config["slate_size"]):
         npt.assert_array_equal(
             resp[i]["click"], batch.next_response_click.squeeze(0)[i]
         )
         npt.assert_array_equal(
             resp[i]["cluster_id"], batch.next_response_cluster_id.squeeze(0)[i]
         )
         npt.assert_array_equal(
             resp[i]["liked"], batch.next_response_liked.squeeze(0)[i]
         )
         npt.assert_array_almost_equal(
             resp[i]["quality"], batch.next_response_quality.squeeze(0)[i]
         )
         npt.assert_array_almost_equal(
             resp[i]["watch_time"], batch.next_response_watch_time.squeeze(0)[i]
         )
Ejemplo n.º 6
0
    def make(self) -> gym.Env:
        env_config = {
            "slate_size": self.slate_size,
            "seed": self.initial_seed,
            "num_candidates": self.num_candidates,
            "resample_documents": self.resample_documents,
        }
        if self.is_interest_exploration:
            env = interest_exploration.create_environment(env_config)
            return ValueWrapper(env, lambda user, doc: 0.0)

        if self.single_selection:
            env = interest_evolution.create_environment(env_config)
            return ValueWrapper(env, dot_value_fn)
        else:
            env = create_multiclick_environment(env_config)
            return ValueWrapper(env, multi_selection_value_fn)
Ejemplo n.º 7
0
    def test_create_from_recsim_interest_evolution(self):
        env_config = {
            "num_candidates": 20,
            "slate_size": 3,
            "resample_documents": False,
            "seed": 1,
        }
        env = interest_evolution.create_environment(env_config)
        replay_buffer = ReplayBuffer.create_from_env(env,
                                                     replay_memory_size=100,
                                                     batch_size=10,
                                                     store_log_prob=True)
        obs = env.reset()
        observation = obs["user"]
        action = env.action_space.sample()
        log_prob = -1.0
        doc_features = np.stack(list(obs["doc"].values()), axis=0)

        next_obs, reward, terminal, _env = env.step(action)

        response = next_obs["response"]
        click = np.array([r["click"] for r in response])
        response_quality = np.stack([r["quality"] for r in response], axis=0)
        repsonse_cluster_id = np.array([r["cluster_id"] for r in response])
        response_watch_time = np.stack([r["watch_time"] for r in response],
                                       axis=0)
        response_liked = np.array([r["liked"] for r in response])
        replay_buffer.add(
            observation,
            action,
            reward,
            terminal,
            mdp_id=0,
            sequence_number=0,
            doc=doc_features,
            response_click=click,
            response_cluster_id=repsonse_cluster_id,
            response_quality=response_quality,
            response_liked=response_liked,
            response_watch_time=response_watch_time,
            log_prob=log_prob,
        )
Ejemplo n.º 8
0
    env_config = {
        'num_candidates': num_candidates,
        'slate_size': slate_size,
        'resample_documents': True,
        'seed': 0,
    }

    env_config1 = {
        'num_candidates': num_candidates,
        'slate_size': slate_size,
        'resample_documents': True,
        'seed': 0,
        'reward_function': clicked_quality_reward
    }
    # User simulation environment: interest evolution model presented in the paper of SlateQ
    recsim_gym_env = interest_evolution.create_environment(env_config1)

    results_f = []

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        #agent = full_slate_q_agent.FullSlateQAgent(sess,
        #recsim_gym_env.observation_space, recsim_gym_env.action_space)

        agent = greedy_pctr_agent.GreedyPCTRAgent(
            sess, recsim_gym_env.observation_space,
            recsim_gym_env.action_space)
        #agent = cluster_bandit_agent.ClusterBanditAgent(sess,recsim_gym_env.observation_space,recsim_gym_env.action_space)

        for i in range(10):

            steps_f, watch, time_f, q, q_vid, w_vid = evalRun_one_episode(