def multiprocessing_example(): # Multiprocessing: Unleashing the Power of Vectorized Environments def make_env(env_id, rank, seed=0): """ Utility function for multiprocessed env. :param env_id: (str) the environment ID. :param num_env: (int) the number of environments you wish to have in subprocesses. :param seed: (int) the inital seed for RNG. :param rank: (int) index of the subprocess. """ def _init(): env = gym.make(env_id) env.seed(seed + rank) return env set_random_seed(seed) return _init env_id = "CartPole-v1" num_cpu = 4 # Number of processes to use. # Create the vectorized environment. env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) # Stable Baselines provides you with make_vec_env() helper which does exactly the previous steps for you. # You can choose between 'DummyVecEnv' (usually faster) and 'SubprocVecEnv'. #env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) model = PPO("MlpPolicy", env, verbose=1) model.learn(total_timesteps=25_000) obs = env.reset() for _ in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
from stable_baselines3 import PPO from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.utils import set_random_seed def make_env(env_id, rank, seed=0): def _init(): env = gym.make(env_id) env.seed(seed + rank) return env set_random_seed(seed) return _init if __name__ == '__main__': env_id = "CartPole-v1" num_cpu = 4 # Number of processes to use env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) model = PPO('MlpPolicy', env, verbose=1) model.learn(total_timesteps=25000) obs = env.reset() for _ in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
def main(): # multiprocess environment n_cpu = 8 env = SubprocVecEnv( [lambda: gym.make('DYROSTocabi-v1') for i in range(n_cpu)]) env = VecNormalize(env, norm_obs=True, clip_obs=2.0, norm_reward=False, training=True) # n_cpu = 1 # env = gym.make('DYROSTocabi-v1') # env = DummyVecEnv([lambda: env]) # env = VecNormalize(env, norm_obs=True, clip_obs=2.0, norm_reward=False, training=True) model = PPO('MlpPolicy', env, verbose=1, n_steps=int(4096 / n_cpu), wandb_use=True) model.learn(total_timesteps=40000000) file_name = "ppo2_DYROSTocabi_" + str(datetime.datetime.now()) model.save(file_name) env.save(file_name + "_env.pkl") model.policy.to("cpu") for name, param in model.policy.state_dict().items(): weight_file_name = "./result/" + name + ".txt" np.savetxt(weight_file_name, param.data) np.savetxt("./result/obs_mean.txt", env.obs_rms.mean) np.savetxt("./result/obs_variance.txt", env.obs_rms.var) del model # remove to demonstrate saving and loading del env # file_name = "ppo2_DYROSTocabi_2021-02-27 02:20:20.015346" env = gym.make('DYROSTocabi-v1') env = DummyVecEnv([lambda: env]) env = VecNormalize.load(file_name + "_env.pkl", env) env.training = False model = PPO.load(file_name, env=env, wandb_use=False) model.policy.to("cpu") for name, param in model.policy.state_dict().items(): weight_file_name = "./result/" + name + ".txt" np.savetxt(weight_file_name, param.data) np.savetxt("./result/obs_mean.txt", env.obs_rms.mean) np.savetxt("./result/obs_variance.txt", env.obs_rms.var) #Enjoy trained agent obs = np.copy(env.reset()) epi_reward = 0 while True: action, _states = model.predict(obs, deterministic=True) obs, rewards, dones, info = env.step(action) env.render() epi_reward += rewards if dones: print("Episode Reward: ", epi_reward) epi_reward = 0