Esempio n. 1
0
def run_experiment(args):
    # Again could have used the SB3 tools here, buuuut...
    vecEnv = []
    for i in range(args.n_envs):
        # Bit of trickery here to avoid referencing
        # to the same "i"
        vecEnv.append((lambda idx: lambda: create_env(args, idx))(i))

    vecEnv = DummyVecEnv(vecEnv)

    constraint = AVAILABLE_CONSTRAINTS[args.constraint]
    agent = None
    if constraint == "ClipPPO":
        # Create a vanilla PPO
        agent = PPO("MlpPolicy",
                    vecEnv,
                    verbose=2,
                    device="cpu",
                    n_steps=args.n_steps,
                    clip_range=args.clip_range,
                    learning_rate=args.learning_rate,
                    gamma=args.gamma,
                    ent_coef=args.ent_coef,
                    gae_lambda=1.0,
                    n_epochs=args.n_epochs)
    else:
        constraint = constraint(args)

        agent = SmallStepPPO("MlpPolicy",
                             vecEnv,
                             verbose=2,
                             device="cpu",
                             n_steps=args.n_steps,
                             step_constraint=constraint,
                             learning_rate=args.learning_rate,
                             step_constraint_max_updates=args.max_updates,
                             gamma=args.gamma,
                             ent_coef=args.ent_coef,
                             gae_lambda=1.0)

    output_log_file = None
    if args.output_log:
        output_log_file = open(args.output_log, "w")
        logger.Logger.CURRENT = logger.Logger(
            folder=None,
            output_formats=[logger.HumanOutputFormat(output_log_file)])

    agent.learn(total_timesteps=args.total_timesteps)

    if args.output is not None:
        agent.save(os.path.join(args.output, AGENT_FILE))

    vecEnv.close()
    if output_log_file:
        output_log_file.close()
Esempio n. 2
0
    clip_range_vf=float('inf'),
    max_grad_norm=0.5  #float('inf')
)

# Create the learning agent according to the chosen algorithm
agent = PPO(MlpPolicy,
            env,
            **config,
            tensorboard_log=tensorboard_data_path,
            verbose=True)

# Load an agent if desired
# agent = PPO2.load("cartpole_ppo2_baseline.pkl")

# Run the learning process
agent.learn(total_timesteps=400000, log_interval=5, reset_num_timesteps=False)

# Save the agent if desired
# agent.save("cartpole_ppo2_baseline.pkl")

### Enjoy a trained agent

# duration of the simulations in seconds
t_end = 20.0

# Get the time step of Jiminy
env.remotes[0].send(('get_attr', 'dt'))
dt = env.remotes[0].recv()

# Run the simulation in real-time
obs = env.reset()
    # env = DummyVecEnv([lambda: get_env(game, state, scenario)])
    # env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env = VecCheckNan(env, raise_exception=True)

    # Create a callback to save every n timesteps
    prefix = "ppo_" + game + "_" + experiment_id
    checkpoint_callback = CheckpointCallback(
        save_freq=100000,
        save_path="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models",
        name_prefix=prefix)

    savefile_name = prefix + "_final"

    savefile_name = os.path.join(
        "C:\\Projects\\OpenAI Games\\retro-ai-hacking\\models", savefile_name)

    model = PPO(
        CnnPolicy,
        env,
        verbose=1,
        n_steps=128,
        n_epochs=3,
        learning_rate=2.5e-4,
        batch_size=32,
        ent_coef=0.01,
        vf_coef=1.0,
        tensorboard_log="C:\\Projects\\OpenAI Games\\retro-ai-hacking\\tb_logs"
    )
    model.learn(total_timesteps=1000000, callback=checkpoint_callback)
    model.save(savefile_name)