def test_no_user_id(self): env = movie_lens_dynamic.create_gym_environment(self.env_config) agent = batched_movielens_rnn_agent.MovieLensRNNAgent( env.observation_space, env.action_space, stateful=True, batch_size=1, user_id_input=False, user_embedding_size=0, max_episode_length=None) for _ in range(3): for _ in range(7): reward = 0 observation = env.reset() for _ in range(2): slate = agent.step(reward, observation) observation, reward, _, _ = env.step(slate) agent.end_episode(reward, observation, eval_mode=True) # There are 7 episodes in every batch used to update the model. agent.set_batch_size(7) agent.model_update(learning_rate=0.1, lambda_learning_rate=0.1, var_learning_rate=0.1) agent.empty_buffer() # The agent and environment simulate one episode at a time. agent.set_batch_size(1)
def test_interaction(self): env = movie_lens_dynamic.create_gym_environment(self.env_config) agent = batched_movielens_rnn_agent.MovieLensRNNAgent( env.observation_space, env.action_space, max_episode_length=None) for _ in range(3): for _ in range(2): reward = 0 observation = env.reset() for _ in range(2): slate = agent.step(reward, observation) observation, reward, _, _ = env.step(slate) agent.end_episode(reward, observation, eval_mode=True) agent.model_update(learning_rate=0.1, lambda_learning_rate=0.1, var_learning_rate=0.1) agent.empty_buffer()
def test_batch_interaction(self): envs = [ movie_lens_dynamic.create_gym_environment(self.env_config) for _ in range(5) ] agent = batched_movielens_rnn_agent.MovieLensRNNAgent( envs[0].observation_space, envs[0].action_space, max_episode_length=None) for _ in range(3): rewards = [0 for _ in envs] observations = [env.reset() for env in envs] for _ in range(2): slates = agent.step(rewards, observations) observations, rewards, _, _ = zip( *[env.step(slate) for env, slate in zip(envs, slates)]) agent.end_episode(rewards, observations, eval_mode=True) agent.model_update(learning_rate=0.1, lambda_learning_rate=0.1, var_learning_rate=0.1) agent.empty_buffer()