Esempio n. 1
0
 def test_no_user_id(self):
     env = movie_lens_dynamic.create_gym_environment(self.env_config)
     agent = batched_movielens_rnn_agent.MovieLensRNNAgent(
         env.observation_space,
         env.action_space,
         stateful=True,
         batch_size=1,
         user_id_input=False,
         user_embedding_size=0,
         max_episode_length=None)
     for _ in range(3):
         for _ in range(7):
             reward = 0
             observation = env.reset()
             for _ in range(2):
                 slate = agent.step(reward, observation)
                 observation, reward, _, _ = env.step(slate)
             agent.end_episode(reward, observation, eval_mode=True)
         # There are 7 episodes in every batch used to update the model.
         agent.set_batch_size(7)
         agent.model_update(learning_rate=0.1,
                            lambda_learning_rate=0.1,
                            var_learning_rate=0.1)
         agent.empty_buffer()
         # The agent and environment simulate one episode at a time.
         agent.set_batch_size(1)
Esempio n. 2
0
 def test_interaction(self):
     env = movie_lens_dynamic.create_gym_environment(self.env_config)
     agent = batched_movielens_rnn_agent.MovieLensRNNAgent(
         env.observation_space, env.action_space, max_episode_length=None)
     for _ in range(3):
         for _ in range(2):
             reward = 0
             observation = env.reset()
             for _ in range(2):
                 slate = agent.step(reward, observation)
                 observation, reward, _, _ = env.step(slate)
             agent.end_episode(reward, observation, eval_mode=True)
         agent.model_update(learning_rate=0.1,
                            lambda_learning_rate=0.1,
                            var_learning_rate=0.1)
         agent.empty_buffer()
Esempio n. 3
0
 def test_batch_interaction(self):
     envs = [
         movie_lens_dynamic.create_gym_environment(self.env_config)
         for _ in range(5)
     ]
     agent = batched_movielens_rnn_agent.MovieLensRNNAgent(
         envs[0].observation_space,
         envs[0].action_space,
         max_episode_length=None)
     for _ in range(3):
         rewards = [0 for _ in envs]
         observations = [env.reset() for env in envs]
         for _ in range(2):
             slates = agent.step(rewards, observations)
             observations, rewards, _, _ = zip(
                 *[env.step(slate) for env, slate in zip(envs, slates)])
         agent.end_episode(rewards, observations, eval_mode=True)
         agent.model_update(learning_rate=0.1,
                            lambda_learning_rate=0.1,
                            var_learning_rate=0.1)
         agent.empty_buffer()