def perform_expert_episodes_continuous(simu, batch, nb_trajs, render=False):
    """
    Build a batch of 20 expert episodes playing a simple policy close to bangbang (go left first, then go right)
    :param simu: the simulation
    :param batch: the batch to be filled
    :param render: whether the step is displayed or not (True or False)
    :return: the batch
    """
    for e in range(nb_trajs):
        state = simu.reset(render)

        episode = Episode()
        for _ in range(50):
            variation = random.random() / 20
            action = [-1.0 + variation]
            state, reward, done = simu.take_step(state, action, episode,
                                                 render)

        for t in count():
            variation = random.random() / 10
            action = [1.0 - variation]
            state, reward, done = simu.take_step(state, action, episode,
                                                 render)

            if done:
                batch.add_episode(episode)
                # print("expert continuous nb steps:", t+50)
                break
    return batch
Esempio n. 2
0
    def train_on_one_episode(self, policy, deterministic, render=False):
        """
        Perform an episode using the policy parameter and return the corresponding samples into an episode structure
        :param policy: the policy controlling the agent
        :param deterministic: whether the evaluation should use a deterministic policy or not
        :param render: whether the episode is displayed or not (True or False)
        :return: the samples stored into an episode
        """
        episode = Episode()
        state = self.reset(render)
        for _ in count():
            action = policy.select_action(state, deterministic)
            next_state, _, done = self.take_step(state, action, episode,
                                                 render)
            state = next_state

            if done:
                return episode