def test_0_donothing(self): agent = DoNothingAgent(self.env.helper_action_player) i, cum_reward = self._aux_test_agent(agent) assert i == 31, "The powerflow diverged before step 30 for do nothing" assert np.abs( cum_reward - 619.994619 ) <= self.tol_one, "The reward has not been properly computed"
""" Close all the environments and all the processes. """ for remote in self._remotes: remote.send(('c', None)) if __name__ == "__main__": from tqdm import tqdm env = make() nb_env = 8 # change that to adapt to your system NB_STEP = 1000 # number of step for each environment agent = DoNothingAgent(env.action_space) multi_envs = MultiEnvironment(env=env, nb_env=nb_env) obs = multi_envs.reset() rews = [env.reward_range[0] for i in range(nb_env)] dones = [False for i in range(nb_env)] total_reward = 0. for i in tqdm(range(NB_STEP)): acts = [None for _ in range(nb_env)] for env_act_id in range(nb_env): acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id]) obs, rews, dones, infos = multi_envs.step(acts) total_reward += np.sum(rews) len(rews)