verbose=1, tensorboard_log=str(common.output_data_folder / f"tensorboard" / saved_model_filename), seed=common.seed, **hyperparams) if len(args.initialize_weights_from) > 0: try: load_model = PPO.load(args.initialize_weights_from) load_weights = load_model.policy.state_dict() model.policy.load_state_dict(load_weights) print(f"Weights loaded from {args.initialize_weights_from}") except Exception: print("Initialize weights error.") raise Exception try: model.learn(total_timesteps=args.train_steps, callback=all_callbacks) except KeyboardInterrupt: pass model.save(str(common.output_data_folder / "models" / saved_model_filename)) if args.vec_normalize: # Important: save the running average, for testing the agent we need that normalization model.get_vec_normalize_env().save( str(common.output_data_folder / "models" / f"{saved_model_filename}.vnorm.pkl")) venv.close()
else: body = args.body_id print(body) env = DummyVecEnv([utils.make_env(rank=i, seed=utils.seed, render=args.render, robot_body=body, body_info=0) for i in range(train_num_envs)]) save_filename = f"model-ant-single-{body}" env = VecNormalize(env, **normalize_kwargs) keys_remove =["normalize", "n_envs", "n_timesteps", "policy"] for key in keys_remove: del hyperparams[key] eval_env = DummyVecEnv([utils.make_env(rank=0, seed=utils.seed+1, render=False, robot_body=2, body_info=0)]) eval_env = VecNormalize(eval_env, norm_reward=False, **normalize_kwargs) eval_callback = EvalCallback( eval_env=eval_env, n_eval_episodes=3, eval_freq=1e4, # will implicitly multiplied by 16 (train_num_envs) deterministic=True, ) # eval_callback = None model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f"{folder}/tb/{save_filename}", seed=utils.seed, **hyperparams) model.learn(total_timesteps=total_timesteps, callback=eval_callback) model.save(f"{folder}/{save_filename}") # Important: save the running average, for testing the agent we need that normalization model.get_vec_normalize_env().save(f"{folder}/{save_filename}-vecnormalize.pkl") env.close()