def test_vec_env(tmp_path, make_env): """Test VecNormalize Object""" clip_obs = 0.5 clip_reward = 5.0 orig_venv = DummyVecEnv([make_env]) norm_venv = VecNormalize(orig_venv, norm_obs=True, norm_reward=True, clip_obs=clip_obs, clip_reward=clip_reward) _, done = norm_venv.reset(), [False] while not done[0]: actions = [norm_venv.action_space.sample()] obs, rew, done, _ = norm_venv.step(actions) if isinstance(obs, dict): for key in obs.keys(): assert np.max(np.abs(obs[key])) <= clip_obs else: assert np.max(np.abs(obs)) <= clip_obs assert np.max(np.abs(rew)) <= clip_reward path = tmp_path / "vec_normalize" norm_venv.save(path) deserialized = VecNormalize.load(path, venv=orig_venv) check_vec_norm_equal(norm_venv, deserialized)
def test_vec_env(tmpdir): """Test VecNormalize Object""" clip_obs = 0.5 clip_reward = 5.0 orig_venv = DummyVecEnv([make_env]) norm_venv = VecNormalize(orig_venv, norm_obs=True, norm_reward=True, clip_obs=clip_obs, clip_reward=clip_reward) _, done = norm_venv.reset(), [False] while not done[0]: actions = [norm_venv.action_space.sample()] obs, rew, done, _ = norm_venv.step(actions) assert np.max(np.abs(obs)) <= clip_obs assert np.max(np.abs(rew)) <= clip_reward path = str(tmpdir.join("vec_normalize")) norm_venv.save(path) deserialized = VecNormalize.load(path, venv=orig_venv) check_vec_norm_equal(norm_venv, deserialized)
env, policy_kwargs=policy_kwargs, n_steps=4000, verbose=1, batch_size=10000, n_epochs=4) if not (os.path.exists(policy_save_dir)): os.makedirs(policy_save_dir) # model = PPO('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1, n_steps=2048) # model.set_parameters(os.path.join(policy_save_dir, 'ppo_model_6_S_PV_4096_3000w_21-03-2021_02-50-06.zip')) t1 = time.time() model.learn(total_timesteps=12000000) print(f"The training spent {time.time() - t1} s.") model.save(policy_save_path) env.save(env_stats_path) else: # env = env_change_input(time_step=env_params['time_step'], # robot_class=env_params['robot_class'], # on_rack=env_params['on_rack'], # enable_self_collision=env_params['enable_self_collision'], # motor_control_mode=env_params['motor_control_mode'], # train_or_test=env_params['train_or_test']) # env = env_change_input(**env_params) env = SubprocVecEnv([lambda: env_change_input(**env_params)]) env_stats_load_path = os.path.join( policy_save_dir, 'ppo_env_8_S_PV_4096_12w_21-03-2021_20-46-02.pkl') env = VecNormalize.load(env_stats_load_path, env) env.training = False env.norm_reward = False