def test_bundle_and_unbundle(self): # Initialize agent slate_size = 1 num_candidates = 3 action_space = spaces.MultiDiscrete(num_candidates * np.ones( (slate_size, ))) user_model = ie.IEUserModel(slate_size, user_state_ctor=ie.IEUserState, response_model_ctor=ie.IEResponse) agent = random_agent.RandomAgent(action_space, random_seed=0) # Create a set of documents document_sampler = ie.IETopicDocumentSampler() documents = {} for i in range(num_candidates): video = document_sampler.sample_document() documents[i] = video.create_observation() # Test that slate indices in correct range and length is correct observation = dict(user=user_model.create_observation(), doc=documents) agent.step(1, observation) bundle_dict = agent.bundle_and_checkpoint('', 0) self.assertTrue(agent.unbundle('', 0, bundle_dict)) self.assertEqual(bundle_dict, agent.bundle_and_checkpoint('', 0))
def test_bundle_and_unbundle_trivial(self): action_space = spaces.MultiDiscrete(np.ones((1,))) agent = random_agent.RandomAgent(action_space, random_seed=0) self.assertFalse(agent.unbundle('', 0, {})) self.assertEqual({ 'episode_num': 0 }, agent.bundle_and_checkpoint('', 0))
def generate_trajectories(env, n_paths, n_steps=100, num_candidates=5, slate_size=2): """ Generate the experts episodes through the expert simulation environment Args: n_paths : number of trajectories (episodes) n_steps: maximum length of a trajectory num_candidate: size of the corpus of each state slate_size: the size of a recommendation slate Returns: trajectories: list of expert episodes (expert_obs,video_obs,response) states: list of all observations (states) constructing the trajectories """ trajectories = [] states = [] Observations = namedtuple('Observation', ['user_state', 'responses', 'documents', 'done']) expert_gym_env = recsim_gym.RecSimGymEnv(env) done = False print(n_steps) # Create agent action_space = spaces.MultiDiscrete(num_candidates * np.ones( (slate_size, ))) agent = random_agent.RandomAgent(action_space, random_seed=0) for i in range(0, n_paths): observation_0 = expert_gym_env.reset() #print(observation_0['doc']) recommendation_slate_0 = agent.step(0, observation_0) path = [] count = 1 while not done and count <= n_steps: observation_next, _, done, _, _ = expert_gym_env.step( recommendation_slate_0) path.append( Observations(observation_next['user'], observation_next['response'], observation_next['doc'], done)) count += 1 states.append( Observations(observation_next['user'], observation_next['response'], observation_next['doc'], done)) recommendation_slate_0 = agent.step(observation_next) trajectories.append(path) return trajectories, states
def test_slate_indices_and_length(self): # Initialize agent slate_size = 2 num_candidates = 100 action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) user_model = iev.IEvUserModel( slate_size, choice_model_ctor=choice_model.MultinomialLogitChoiceModel, response_model_ctor=iev.IEvResponse) agent = random_agent.RandomAgent(action_space, random_seed=0) # Create a set of documents document_sampler = iev.IEvVideoSampler() ievenv = environment.Environment(user_model, document_sampler, num_candidates, slate_size) # Test that slate indices in correct range and length is correct observation, documents = ievenv.reset() slate = agent.step(1, dict(user=observation, doc=documents)) self.assertLen(slate, slate_size) self.assertAllInSet(slate, range(num_candidates))
def test_step(self): # Create a simple user slate_size = 2 user_model = iev.IEvUserModel( slate_size, choice_model_ctor=choice_model.MultinomialLogitChoiceModel, response_model_ctor=iev.IEvResponse) # Create a candidate_set with 5 items num_candidates = 5 document_sampler = iev.IEvVideoSampler() ievsim = environment.Environment(user_model, document_sampler, num_candidates, slate_size) # Create agent action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) agent = random_agent.RandomAgent(action_space, random_seed=0) # This agent doesn't use the previous user response observation, documents = ievsim.reset() slate = agent.step(1, dict(user=observation, doc=documents)) self.assertAllEqual(slate, [2, 0])
def create_agent_random(slate_size, random_seed=0): action_space = spaces.MultiDiscrete(num_candidates * np.ones( (slate_size, ))) return random_agent.RandomAgent(action_space, random_seed)