Esempio n. 1
0
def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'):

    U.make_session(num_cpu=4).__enter__()
    set_global_seeds(seed)

    env = gym.make(env_id)
    # env.update_adversary(n)
    env=Monitor(env, log_dir, allow_early_resets=True)
    env.seed(seed)

    test_env = gym.make(env_id)
    test_env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    # debug not working
    # num_cpu=4
    # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)])

    rew = PPO_RARL.learn(env, test_env, policy_fn,
                         timesteps_per_batch=2048,
                         clip_param=0.2, entcoeff=0.0,
                         optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
                         gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward,
                         save_path=save_path, max_iters=num_iters, callback=plot_callback
                         )

    env.close()

    return rew
Esempio n. 2
0
def train(env_id, num_iters, seed, success_reward, save_path, q):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(seed)
    env = gym.make(env_id)
    env=Monitor(env, log_dir, allow_early_resets=True)
    test_env = gym.make(env_id)

    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), "monitor.json"))
    env.seed(seed)
    test_env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    rew = PPO.learn_with_human(env, test_env, policy_fn,
                    max_iters=num_iters,
                    timesteps_per_batch=2048,
                    clip_param=0.2, entcoeff=0.0,
                    optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
                    gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward,
                    save_path=save_path, callback=plot_callback, data_queue=q,
                    )
    env.close()
    return rew