Example #1
0
 def make_monte_carlo_batch(self,
                            nb_episodes,
                            render,
                            policy,
                            weights_flag=False,
                            weights=None):
     """
     Create a batch of episodes with a given policy
     Used in Monte Carlo approaches
     :param nb_episodes: the number of episodes in the batch
     :param render: whether the episode is displayed or not (True or False)
     :param policy: the policy controlling the agent
     :param weights_flag
     :param weights
     :return: the resulting batch of episodes
     """
     if weights_flag:
         batch = Batch(weights)
     else:
         batch = Batch()
     self.env.set_reward_flag(False)
     self.env.set_duration_flag(False)
     for e in range(nb_episodes):
         episode = self.train_on_one_episode(policy, False, render)
         batch.add_episode(episode)
     return batch