Exemple #1
0
 def test_0_donothing(self):
     agent = DoNothingAgent(self.env.helper_action_player)
     i, cum_reward = self._aux_test_agent(agent)
     assert i == 31, "The powerflow diverged before step 30 for do nothing"
     assert np.abs(
         cum_reward - 619.994619
     ) <= self.tol_one, "The reward has not been properly computed"
Exemple #2
0
        """
        Close all the environments and all the processes.
        """
        for remote in self._remotes:
            remote.send(('c', None))


if __name__ == "__main__":
    from tqdm import tqdm

    env = make()

    nb_env = 8  # change that to adapt to your system
    NB_STEP = 1000  # number of step for each environment

    agent = DoNothingAgent(env.action_space)
    multi_envs = MultiEnvironment(env=env, nb_env=nb_env)

    obs = multi_envs.reset()
    rews = [env.reward_range[0] for i in range(nb_env)]
    dones = [False for i in range(nb_env)]

    total_reward = 0.
    for i in tqdm(range(NB_STEP)):
        acts = [None for _ in range(nb_env)]
        for env_act_id in range(nb_env):
            acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id],
                                         dones[env_act_id])
        obs, rews, dones, infos = multi_envs.step(acts)
        total_reward += np.sum(rews)
        len(rews)