def make_mujoco_env(env_id, seed, normalize=False, training=True): def make_env(): env = gym.make(env_id) env.seed(seed) return env env = DummyVecEnv([make_env]) np.random.seed(seed) torch.manual_seed(seed) if normalize: env = VecNormalize(env, training=training) return env
def build_env4gail(args, nenv): def make_env(): def _thunk(): env = gym.make(args.env_id) env.seed(args.seed) # to make the result more reproducibility env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env return _thunk envs = [make_env() for i in range(nenv)] envs = SubprocVecEnv(envs) envs = VecNormalize(envs) return envs
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env
def main(): parser = arg_parser() parser.add_argument('--platform', help='environment choice', choices=['atari', 'mujoco', 'humanoid', 'robotics'], default='atari') platform_args, environ_args = parser.parse_known_args() platform = platform_args.platform logger.configure() # atari if platform == 'atari': parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn') args = parser.parse_known_args()[0] # fit( # args.env, # num_timesteps=args.num_timesteps, # seed=args.seed, # policy=args.policy # ) sess = Agent().init_session().__enter__() env = VecFrameStack(make_atari_env(args.env, 8, args.seed), 4) policy = {'cnn' : Convnet, 'lstm' : Lstm, 'lnlstm' : LnLstm, 'mlp': Mlp}[args.policy] fit( policy=policy, env=env, nsteps=128, nminibatches=8, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=.01, lr=lambda f: f * 2.5e-4, cliprange=lambda f: f * 0.1, total_timesteps=int(args.num_timesteps * 1.1) ) sess.close() env.close() del sess # mujoco if platform == 'mujoco': args = mujoco_arg_parser().parse_known_args()[0] sess = Agent().init_session().__enter__() from utils.monitor import Monitor def make_env(): env = make_mujoco_env(args.env, args.seed) # env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env env = DummyVecEnv([make_env]) env = VecNormalize(env) model = fit( policy=Mlp, env=env, nsteps=2048, nminibatches=32, lam=0.95, gamma=0.99, noptepochs=10, log_interval=1, ent_coef=0.0, lr=3e-4, cliprange=0.2, total_timesteps=args.num_timesteps ) # return model, env if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render() sess.close() env.close() del sess