def run_experiment(args): # Again could have used the SB3 tools here, buuuut... vecEnv = [] for i in range(args.n_envs): # Bit of trickery here to avoid referencing # to the same "i" vecEnv.append((lambda idx: lambda: create_env(args, idx))(i)) vecEnv = DummyVecEnv(vecEnv) constraint = AVAILABLE_CONSTRAINTS[args.constraint] agent = None if constraint == "ClipPPO": # Create a vanilla PPO agent = PPO("MlpPolicy", vecEnv, verbose=2, device="cpu", n_steps=args.n_steps, clip_range=args.clip_range, learning_rate=args.learning_rate, gamma=args.gamma, ent_coef=args.ent_coef, gae_lambda=1.0, n_epochs=args.n_epochs) else: constraint = constraint(args) agent = SmallStepPPO("MlpPolicy", vecEnv, verbose=2, device="cpu", n_steps=args.n_steps, step_constraint=constraint, learning_rate=args.learning_rate, step_constraint_max_updates=args.max_updates, gamma=args.gamma, ent_coef=args.ent_coef, gae_lambda=1.0) output_log_file = None if args.output_log: output_log_file = open(args.output_log, "w") logger.Logger.CURRENT = logger.Logger( folder=None, output_formats=[logger.HumanOutputFormat(output_log_file)]) agent.learn(total_timesteps=args.total_timesteps) if args.output is not None: agent.save(os.path.join(args.output, AGENT_FILE)) vecEnv.close() if output_log_file: output_log_file.close()
clip_range_vf=float('inf'), max_grad_norm=0.5 #float('inf') ) # Create the learning agent according to the chosen algorithm agent = PPO(MlpPolicy, env, **config, tensorboard_log=tensorboard_data_path, verbose=True) # Load an agent if desired # agent = PPO2.load("cartpole_ppo2_baseline.pkl") # Run the learning process agent.learn(total_timesteps=400000, log_interval=5, reset_num_timesteps=False) # Save the agent if desired # agent.save("cartpole_ppo2_baseline.pkl") ### Enjoy a trained agent # duration of the simulations in seconds t_end = 20.0 # Get the time step of Jiminy env.remotes[0].send(('get_attr', 'dt')) dt = env.remotes[0].recv() # Run the simulation in real-time obs = env.reset()
# env = DummyVecEnv([lambda: get_env(game, state, scenario)]) # env = VecNormalize(env, norm_obs=True, norm_reward=False) env = VecCheckNan(env, raise_exception=True) # Create a callback to save every n timesteps prefix = "ppo_" + game + "_" + experiment_id checkpoint_callback = CheckpointCallback( save_freq=100000, save_path="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", name_prefix=prefix) savefile_name = prefix + "_final" savefile_name = os.path.join( "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", savefile_name) model = PPO( CnnPolicy, env, verbose=1, n_steps=128, n_epochs=3, learning_rate=2.5e-4, batch_size=32, ent_coef=0.01, vf_coef=1.0, tensorboard_log="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\tb_logs" ) model.learn(total_timesteps=1000000, callback=checkpoint_callback) model.save(savefile_name)