def setUp(self):
    self.slate_size = 2
    self.num_clusters = 2
    super(ClusterClickStatisticsTest, self).setUp()
    self.test_action_space = mock.Mock(nvec=mock.Mock(shape=[
        2,
    ]))
    single_response_space = spaces.Dict({
        'click': spaces.Discrete(2),
        'cluster_id': spaces.Discrete(self.num_clusters)
    })

    self.test_observation_space = spaces.Dict({
        'user':
            spaces.Discrete(2),
        'response':
            spaces.Tuple(tuple([
                single_response_space,
            ] * self.slate_size)),
        'doc':
            spaces.Tuple((spaces.Discrete(4),))
    })
    self.mock_agent = mock.create_autospec(
        cluster_bandit_agent.ClusterBanditAgent)
    self.click_stats = cluster_click_statistics.ClusterClickStatsLayer(
        self.mock_agent,
        self.test_observation_space,
        self.test_action_space,
        kwarg_for_agent=-1)
예제 #2
0
  def test_step(self):
    # Initialize agent.
    env_config = {
        'slate_size': 1,
        'num_candidates': 5,
        'resample_documents': True,
        'seed': 1,
    }
    env = ie.create_environment(env_config)
    kwargs = {
        'observation_space': env.observation_space,
        'action_space': env.action_space,
    }
    agent = cluster_click_statistics.ClusterClickStatsLayer(
        cluster_bandit_agent.ClusterBanditAgent, **kwargs)

    observation1, documents1 = env.environment.reset()
    slate1 = agent.step(0,
                        dict(user=observation1, doc=documents1, response=None))
    # Pick the document with the best quality in Topic 0.
    scores_c0 = [(features['quality'] if features['cluster_id'] == 0 else 0)
                 for _, features in documents1.items()]
    scores_c1 = [(features['quality'] if features['cluster_id'] == 1 else 0)
                 for _, features in documents1.items()]
    self.assertIn(slate1[0], [np.argmax(scores_c0), np.argmax(scores_c1)])
    picked_cluster = list(documents1.values())[slate1[0]]['cluster_id']

    observation2, documents, response1, _ = env.environment.step(slate1)
    response1_obs = [response.create_observation() for response in response1]
    response1_obs[0]['cluster_id'] = picked_cluster
    slate2 = agent.step(
        ie.total_clicks_reward(response1),
        dict(user=observation2, doc=documents, response=response1_obs))
    # Pick Topic 1 because we have no observation about it.
    # Pick the document with the best quality there.
    doc_qualities = [
        (features['quality'] if features['cluster_id'] != picked_cluster else 0)
        for _, features in documents.items()
    ]
    self.assertAllEqual(slate2, [
        np.argmax(doc_qualities),
    ])

    self.assertNotEqual(
        list(documents.values())[slate2[0]]['cluster_id'], picked_cluster)

    observation3, documents, response2, _ = env.environment.step(slate2)
    response2_obs = [response.create_observation() for response in response2]
    # Make a clicked response.
    response2_obs[0]['click'] = 1
    response2_obs[0]['cluster_id'] = 1 - picked_cluster
    slate3 = agent.step(
        ie.total_clicks_reward(response2),
        dict(user=observation3, doc=documents, response=response2_obs))
    # Pick the first topic which has the best UCB and then pick the document
    # with the best quality in it.
    pulls = np.array([1, 1], dtype=np.float)
    rewards = np.array([0, 0], dtype=np.float)
    rewards[1 - picked_cluster] = 1
    ct = np.sqrt(2.0 * np.log(2.0))
    topic_index = rewards / pulls + ct * np.sqrt(1.0 / pulls)
    doc_qualities = [(features['quality'] if
                      features['cluster_id'] == np.argmax(topic_index) else 0)
                     for _, features in documents.items()]
    self.assertAllEqual(slate3, [np.argmax(doc_qualities)])

    agent.end_episode(
        ie.total_clicks_reward(response2),
        dict(user=observation3, doc=documents, response=response2_obs))
    slate4 = agent.step(0,
                        dict(user=observation1, doc=documents1, response=None))
    self.assertAllEqual(slate4, slate1)