def perform_expert_episodes_continuous(simu, batch, nb_trajs, render=False): """ Build a batch of 20 expert episodes playing a simple policy close to bangbang (go left first, then go right) :param simu: the simulation :param batch: the batch to be filled :param render: whether the step is displayed or not (True or False) :return: the batch """ for e in range(nb_trajs): state = simu.reset(render) episode = Episode() for _ in range(50): variation = random.random() / 20 action = [-1.0 + variation] state, reward, done = simu.take_step(state, action, episode, render) for t in count(): variation = random.random() / 10 action = [1.0 - variation] state, reward, done = simu.take_step(state, action, episode, render) if done: batch.add_episode(episode) # print("expert continuous nb steps:", t+50) break return batch
def train_on_one_episode(self, policy, deterministic, render=False): """ Perform an episode using the policy parameter and return the corresponding samples into an episode structure :param policy: the policy controlling the agent :param deterministic: whether the evaluation should use a deterministic policy or not :param render: whether the episode is displayed or not (True or False) :return: the samples stored into an episode """ episode = Episode() state = self.reset(render) for _ in count(): action = policy.select_action(state, deterministic) next_state, _, done = self.take_step(state, action, episode, render) state = next_state if done: return episode