예제 #1
0
 def setUp(self):
     super(InterestExplorationTest, self).setUp()
     self._num_topics = 2
     env_config = {
         'num_candidates': 20,
         'slate_size': 2,
         'resample_documents': False,
         'seed': 1,
     }
     self._env = interest_exploration.create_environment(env_config)
예제 #2
0
 def test_interest_exploration_can_run_with_resampling(self):
   env_config = {
       'num_candidates': 5,
       'slate_size': 2,
       'resample_documents': True,
       'seed': 100,
   }
   params = recsim_wrapper.Params(
       recsim_env=interest_exploration.create_environment(env_config))
   env = recsim_wrapper.RecsimWrapper(params)
   test_util.run_test_simulation(env=env, stackelberg=True)
예제 #3
0
    def make(self) -> gym.Env:
        env_config = {
            "slate_size": self.slate_size,
            "seed": self.initial_seed,
            "num_candidates": self.num_candidates,
            "resample_documents": self.resample_documents,
        }
        if self.is_interest_exploration:
            env = interest_exploration.create_environment(env_config)
            return ValueWrapper(env, lambda user, doc: 0.0)

        if self.single_selection:
            env = interest_evolution.create_environment(env_config)
            return ValueWrapper(env, dot_value_fn)
        else:
            env = create_multiclick_environment(env_config)
            return ValueWrapper(env, multi_selection_value_fn)
 def test_recsim_intereset_exploration(self):
     num_candidate = 10
     env_config = {
         "num_candidates": num_candidate,
         "slate_size": 3,
         "resample_documents": False,
         "seed": 1,
     }
     env = interest_exploration.create_environment(env_config)
     replay_buffer, inserted = _create_replay_buffer_and_insert(env)
     batch = replay_buffer.sample_transition_batch_tensor(indices=np.array([0]))
     npt.assert_array_almost_equal(
         inserted[0]["observation"]["user"].astype(np.float32),
         batch.state.squeeze(0),
     )
     npt.assert_array_almost_equal(
         inserted[1]["observation"]["user"], batch.next_state.squeeze(0)
     )
     docs = list(inserted[0]["observation"]["doc"].values())
     next_docs = list(inserted[1]["observation"]["doc"].values())
     for i in range(num_candidate):
         npt.assert_array_almost_equal(
             docs[i]["quality"], batch.doc_quality.squeeze(0)[i]
         )
         npt.assert_array_almost_equal(
             next_docs[i]["quality"], batch.next_doc_quality.squeeze(0)[i]
         )
     npt.assert_array_equal(inserted[0]["action"], batch.action.squeeze(0))
     npt.assert_array_equal(inserted[1]["action"], batch.next_action.squeeze(0))
     npt.assert_array_equal([0, 0, 0], batch.response_click.squeeze(0))
     npt.assert_array_equal([0, 0, 0], batch.response_cluster_id.squeeze(0))
     npt.assert_array_equal([0.0, 0.0, 0.0], batch.response_quality.squeeze(0))
     resp = inserted[1]["observation"]["response"]
     for i in range(env_config["slate_size"]):
         npt.assert_array_equal(
             resp[i]["click"], batch.next_response_click.squeeze(0)[i]
         )
         npt.assert_array_equal(
             resp[i]["cluster_id"], batch.next_response_cluster_id.squeeze(0)[i]
         )
         npt.assert_array_almost_equal(
             resp[i]["quality"].astype(np.float32),
             batch.next_response_quality.squeeze(0)[i],
         )
예제 #5
0
    def test_recsim_interest_exploration(self):
        num_candidate = 10
        env_config = {
            "num_candidates": num_candidate,
            "slate_size": 3,
            "resample_documents": False,
            "seed": 1,
        }
        env = interest_exploration.create_environment(env_config)
        env = ValueWrapper(env, ValueMode.CONST)
        obs_preprocessor = make_default_obs_preprocessor(env)
        obs = env.reset()
        state = obs_preprocessor(obs)
        self.assertFalse(state.has_float_features_only)
        self.assertEqual(state.float_features.shape, (1, obs["user"].shape[0]))
        self.assertEqual(state.float_features.dtype, torch.float32)
        self.assertEqual(state.float_features.device, torch.device("cpu"))
        npt.assert_array_almost_equal(obs["user"], state.float_features.squeeze(0))
        doc_float_features = state.candidate_docs.float_features
        self.assertIsNotNone(doc_float_features)

        quality_len = 1
        expected_doc_feature_length = (
            env.observation_space["doc"]["0"]["cluster_id"].n + quality_len
        )

        self.assertEqual(
            doc_float_features.shape, (1, num_candidate, expected_doc_feature_length)
        )
        self.assertEqual(doc_float_features.dtype, torch.float32)
        self.assertEqual(doc_float_features.device, torch.device("cpu"))
        for i, v in enumerate(obs["doc"].values()):
            expected_doc_feature = torch.cat(
                [
                    F.one_hot(torch.tensor(v["cluster_id"]), 2).float(),
                    # This needs unsqueeze because it's a scalar
                    torch.tensor(v["quality"]).unsqueeze(0).float(),
                ],
                dim=0,
            )
            npt.assert_array_almost_equal(
                expected_doc_feature, doc_float_features[0, i]
            )
예제 #6
0
    def test_create_from_recsim_interest_exploration(self):
        env_config = {
            "num_candidates": 20,
            "slate_size": 3,
            "resample_documents": False,
            "seed": 1,
        }
        env = interest_exploration.create_environment(env_config)
        replay_buffer = ReplayBuffer.create_from_env(env,
                                                     replay_memory_size=100,
                                                     batch_size=10,
                                                     store_log_prob=True)
        obs = env.reset()
        observation = obs["user"]
        action = env.action_space.sample()
        log_prob = -1.0
        quality = np.stack([v["quality"] for v in obs["doc"].values()], axis=0)
        cluster_id = np.array([v["cluster_id"] for v in obs["doc"].values()])

        next_obs, reward, terminal, _env = env.step(action)

        response = next_obs["response"]
        click = np.array([r["click"] for r in response])
        response_quality = np.stack([r["quality"] for r in response], axis=0)
        repsonse_cluster_id = np.array([r["cluster_id"] for r in response])
        replay_buffer.add(
            observation,
            action,
            reward,
            terminal,
            mdp_id=0,
            sequence_number=0,
            doc_quality=quality,
            doc_cluster_id=cluster_id,
            response_click=click,
            response_cluster_id=repsonse_cluster_id,
            response_quality=response_quality,
            log_prob=log_prob,
        )
예제 #7
0
  def test_step(self):
    # Initialize agent.
    env_config = {
        'slate_size': 1,
        'num_candidates': 5,
        'resample_documents': True,
        'seed': 1,
    }
    env = ie.create_environment(env_config)
    kwargs = {
        'observation_space': env.observation_space,
        'action_space': env.action_space,
    }
    agent = cluster_click_statistics.ClusterClickStatsLayer(
        cluster_bandit_agent.ClusterBanditAgent, **kwargs)

    observation1, documents1 = env.environment.reset()
    slate1 = agent.step(0,
                        dict(user=observation1, doc=documents1, response=None))
    # Pick the document with the best quality in Topic 0.
    scores_c0 = [(features['quality'] if features['cluster_id'] == 0 else 0)
                 for _, features in documents1.items()]
    scores_c1 = [(features['quality'] if features['cluster_id'] == 1 else 0)
                 for _, features in documents1.items()]
    self.assertIn(slate1[0], [np.argmax(scores_c0), np.argmax(scores_c1)])
    picked_cluster = list(documents1.values())[slate1[0]]['cluster_id']

    observation2, documents, response1, _ = env.environment.step(slate1)
    response1_obs = [response.create_observation() for response in response1]
    response1_obs[0]['cluster_id'] = picked_cluster
    slate2 = agent.step(
        ie.total_clicks_reward(response1),
        dict(user=observation2, doc=documents, response=response1_obs))
    # Pick Topic 1 because we have no observation about it.
    # Pick the document with the best quality there.
    doc_qualities = [
        (features['quality'] if features['cluster_id'] != picked_cluster else 0)
        for _, features in documents.items()
    ]
    self.assertAllEqual(slate2, [
        np.argmax(doc_qualities),
    ])

    self.assertNotEqual(
        list(documents.values())[slate2[0]]['cluster_id'], picked_cluster)

    observation3, documents, response2, _ = env.environment.step(slate2)
    response2_obs = [response.create_observation() for response in response2]
    # Make a clicked response.
    response2_obs[0]['click'] = 1
    response2_obs[0]['cluster_id'] = 1 - picked_cluster
    slate3 = agent.step(
        ie.total_clicks_reward(response2),
        dict(user=observation3, doc=documents, response=response2_obs))
    # Pick the first topic which has the best UCB and then pick the document
    # with the best quality in it.
    pulls = np.array([1, 1], dtype=np.float)
    rewards = np.array([0, 0], dtype=np.float)
    rewards[1 - picked_cluster] = 1
    ct = np.sqrt(2.0 * np.log(2.0))
    topic_index = rewards / pulls + ct * np.sqrt(1.0 / pulls)
    doc_qualities = [(features['quality'] if
                      features['cluster_id'] == np.argmax(topic_index) else 0)
                     for _, features in documents.items()]
    self.assertAllEqual(slate3, [np.argmax(doc_qualities)])

    agent.end_episode(
        ie.total_clicks_reward(response2),
        dict(user=observation3, doc=documents, response=response2_obs))
    slate4 = agent.step(0,
                        dict(user=observation1, doc=documents1, response=None))
    self.assertAllEqual(slate4, slate1)