def thunk_plus(): # Make 'env_fn' from 'env_name' if 'env_name' in kwargs: import gym env_name = kwargs['env_name'] kwargs['env_fn'] = lambda: gym.make(env_name) del kwargs['env_name'] # Fork into multiple processes mpi_fork(num_cpu) # Run thunk thunk(**kwargs)
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default="HalfCheetah-v2") parser.add_argument("--hid", type=int, default=64) parser.add_argument("--l", type=int, default=2) parser.add_argument("--gamma", type=float, default=0.99) parser.add_argument("--seed", "-s", type=int, default=0) parser.add_argument("--cpu", type=int, default=4) parser.add_argument("--steps", type=int, default=4000) parser.add_argument("--epochs", type=int, default=50) parser.add_argument("--exp_name", type=str, default="trpo") args = parser.parse_args() mpi_fork(args.cpu) # run parallel code with mpi from fireup.utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) trpo( lambda: gym.make(args.env), actor_critic=core.ActorCritic, ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, logger_kwargs=logger_kwargs, )