Ejemplo n.º 1
0
    def test_bundle_and_unbundle(self):
        # Initialize agent
        slate_size = 1
        num_candidates = 3
        action_space = spaces.MultiDiscrete(num_candidates * np.ones(
            (slate_size, )))

        user_model = ie.IEUserModel(slate_size,
                                    user_state_ctor=ie.IEUserState,
                                    response_model_ctor=ie.IEResponse)
        agent = random_agent.RandomAgent(action_space, random_seed=0)

        # Create a set of documents
        document_sampler = ie.IETopicDocumentSampler()
        documents = {}
        for i in range(num_candidates):
            video = document_sampler.sample_document()
            documents[i] = video.create_observation()

        # Test that slate indices in correct range and length is correct
        observation = dict(user=user_model.create_observation(), doc=documents)
        agent.step(1, observation)

        bundle_dict = agent.bundle_and_checkpoint('', 0)
        self.assertTrue(agent.unbundle('', 0, bundle_dict))
        self.assertEqual(bundle_dict, agent.bundle_and_checkpoint('', 0))
Ejemplo n.º 2
0
 def test_bundle_and_unbundle_trivial(self):
   action_space = spaces.MultiDiscrete(np.ones((1,)))
   agent = random_agent.RandomAgent(action_space, random_seed=0)
   self.assertFalse(agent.unbundle('', 0, {}))
   self.assertEqual({
       'episode_num': 0
   }, agent.bundle_and_checkpoint('', 0))
Ejemplo n.º 3
0
def generate_trajectories(env,
                          n_paths,
                          n_steps=100,
                          num_candidates=5,
                          slate_size=2):
    """
        Generate the experts episodes through the expert simulation environment
        Args:
            n_paths : number of trajectories (episodes)
            n_steps: maximum length of a trajectory
            num_candidate: size of the corpus of each state
            slate_size: the size of a recommendation slate

        Returns:
        trajectories: list of expert episodes (expert_obs,video_obs,response) 
        states: list of all observations (states) constructing the trajectories
    """
    trajectories = []
    states = []
    Observations = namedtuple('Observation',
                              ['user_state', 'responses', 'documents', 'done'])
    expert_gym_env = recsim_gym.RecSimGymEnv(env)
    done = False
    print(n_steps)
    # Create agent
    action_space = spaces.MultiDiscrete(num_candidates * np.ones(
        (slate_size, )))
    agent = random_agent.RandomAgent(action_space, random_seed=0)
    for i in range(0, n_paths):
        observation_0 = expert_gym_env.reset()
        #print(observation_0['doc'])
        recommendation_slate_0 = agent.step(0, observation_0)
        path = []
        count = 1
        while not done and count <= n_steps:
            observation_next, _, done, _, _ = expert_gym_env.step(
                recommendation_slate_0)

            path.append(
                Observations(observation_next['user'],
                             observation_next['response'],
                             observation_next['doc'], done))
            count += 1
            states.append(
                Observations(observation_next['user'],
                             observation_next['response'],
                             observation_next['doc'], done))
            recommendation_slate_0 = agent.step(observation_next)

        trajectories.append(path)

    return trajectories, states
Ejemplo n.º 4
0
  def test_slate_indices_and_length(self):
    # Initialize agent
    slate_size = 2
    num_candidates = 100
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))

    user_model = iev.IEvUserModel(
        slate_size,
        choice_model_ctor=choice_model.MultinomialLogitChoiceModel,
        response_model_ctor=iev.IEvResponse)
    agent = random_agent.RandomAgent(action_space, random_seed=0)

    # Create a set of documents
    document_sampler = iev.IEvVideoSampler()
    ievenv = environment.Environment(user_model, document_sampler,
                                     num_candidates, slate_size)

    # Test that slate indices in correct range and length is correct
    observation, documents = ievenv.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    self.assertLen(slate, slate_size)
    self.assertAllInSet(slate, range(num_candidates))
Ejemplo n.º 5
0
  def test_step(self):
    # Create a simple user
    slate_size = 2
    user_model = iev.IEvUserModel(
        slate_size,
        choice_model_ctor=choice_model.MultinomialLogitChoiceModel,
        response_model_ctor=iev.IEvResponse)

    # Create a candidate_set with 5 items
    num_candidates = 5
    document_sampler = iev.IEvVideoSampler()
    ievsim = environment.Environment(user_model, document_sampler,
                                     num_candidates, slate_size)

    # Create agent
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
    agent = random_agent.RandomAgent(action_space, random_seed=0)

    # This agent doesn't use the previous user response
    observation, documents = ievsim.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    self.assertAllEqual(slate, [2, 0])
Ejemplo n.º 6
0
def create_agent_random(slate_size, random_seed=0):
    action_space = spaces.MultiDiscrete(num_candidates * np.ones(
        (slate_size, )))
    return random_agent.RandomAgent(action_space, random_seed)