def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'): U.make_session(num_cpu=4).__enter__() set_global_seeds(seed) env = gym.make(env_id) # env.update_adversary(n) env=Monitor(env, log_dir, allow_early_resets=True) env.seed(seed) test_env = gym.make(env_id) test_env.seed(seed) gym.logger.setLevel(logging.WARN) # debug not working # num_cpu=4 # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)]) rew = PPO_RARL.learn(env, test_env, policy_fn, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward, save_path=save_path, max_iters=num_iters, callback=plot_callback ) env.close() return rew
def train(env_id, num_iters, seed, success_reward, save_path, q): U.make_session(num_cpu=1).__enter__() set_global_seeds(seed) env = gym.make(env_id) env=Monitor(env, log_dir, allow_early_resets=True) test_env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(seed) test_env.seed(seed) gym.logger.setLevel(logging.WARN) rew = PPO.learn_with_human(env, test_env, policy_fn, max_iters=num_iters, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward, save_path=save_path, callback=plot_callback, data_queue=q, ) env.close() return rew