def train(self, **kwargs): """ Run the training algorithm to optimize model parameters for the environment provided. """ # define default parameters for each training algorithm, then perturb them based on user input preset_kwargs = PRESETS[ self.training_alg] # select default kwargs for the algo preset_kwargs.update( kwargs) # update default algo kwargs based on user input render_saves = preset_kwargs.get('render_saves', False) if 'render_saves' in preset_kwargs.keys(): preset_kwargs.pop('render_saves') # dynamically import source code (e.g. import algos.vpg.vpg as mod) mod = import_module("algos.{}.{}".format(self.training_alg, self.training_alg)) method = getattr( mod, self.training_alg) # e.g. from algos.vpg.vpg import vpg if self.actorCritic is None: # use the default actorCritic for the algo core = import_module("algos.{}.core".format( self.training_alg)) # e.g. import algos.vpg.core as core self.actorCritic = getattr( core, DEFAULT_ACTOR_CRITIC[self.training_alg] ) # e.g. from core import MLPActorCritic as actorCritic # prepare mpi if self.ncpu > 1 (and supported by chosen RL algorithm) mpi_fork(self.ncpu) # run parallel code with mpi # update logger kwargs logger_kwargs = setup_logger_kwargs(self.exp_name, preset_kwargs['seed']) preset_kwargs['logger_kwargs'] = logger_kwargs # begin training method(self.env, actor_critic=self.actorCritic, **preset_kwargs) # render all checkpoints user specifies with 'render_saves' if render_saves: log_dir = logger_kwargs['output_dir'] + os.sep + 'pyt_save' + os.sep fnames = glob.glob( log_dir + 'model*.pt' )[1:] # first item in list is final checkpoint, with no itr in file name for checkpoint in fnames: itr = re.search('model(.*).pt', checkpoint).group( 1) # get epoch number from file name render_kwargs = { 'filename': '/gym_animation_' + str(itr) + '.mp4', 'model_itr': itr } self.render(save=True, show=False, seed=self.seed, **render_kwargs)
def thunk_plus(): # Make 'env_fn' from 'env_name' if 'env_name' in kwargs: import gym env_name = kwargs['env_name'] kwargs['env_fn'] = lambda: gym.make(env_name) del kwargs['env_name'] # Fork into multiple processes mpi_fork(num_cpu) # Run thunk thunk(**kwargs)
def main(): model_path = "experiments/20210403_19:22:15_ppo" # model_path = None agent_file = "environments/3DBall_single/3DBall_single.x86_64" if model_path is None: cpus = 8 mpi_fork(cpus) ppo = PPO(lambda: train_environment(agent_file), PPOActorCritic) ppo.train() else: cpus = 1 mpi_fork(cpus) ppo = PPO(lambda: inference_environment(agent_file), PPOActorCritic) test_episodes = 10 ppo.test_model(model_path, test_episodes)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='Pendulum-v0') parser.add_argument('--hid', type=int, default=64) parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--steps', type=int, default=4000) parser.add_argument('--epochs', type=int, default=70) parser.add_argument('--exp_name', type=str, default='vpg') args, _ = parser.parse_known_args() mpi_fork(args.cpu) # run parallel code with mpi vpg(gym.make(args.env), ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs)
def main(): model_path = None agent_file = "worm/worm.x86_64" if model_path is None: cpus = 4 mpi_fork(cpus) no_graphics = True if proc_id() != 0 else False env_fn = lambda: WormGymWrapper(agent_file, no_graphics) ppo = PPO(env_fn, PPOActorCritic, epochs=5) if proc_id() == 0: with mlflow.start_run() as run: ppo.train() else: ppo.train() else: cpus = 1 mpi_fork(cpus) env_fn = lambda: WormGymWrapper( agent_file, time_scale=1., no_graphics=False) ppo = PPO(env_fn, PPOActorCritic) test_episodes = 10 ppo.test_model(model_path, test_episodes)
logger.output_file.close() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') parser.add_argument('--hid', type=int, default=64) parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--steps', type=int, default=4000) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--exp_name', type=str, default='ppo') args = parser.parse_args() mpi_fork(args.cpu) # run parallel code with mpi from spinup.utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) ppo(lambda: gym.make(args.env), actor_critic=core.MLPActorCritic, ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, logger_kwargs=logger_kwargs)
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') parser.add_argument('--hid', type=int, default=64) parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--lam', type=float, default=0.97) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--cpu', type=int, default=8) parser.add_argument('--episodes-per-epoch', type=int, default=40) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--exp_name', type=str, default='gailt') args = parser.parse_args() mpi_fork(args.cpu) from utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) # policyg(lambda: gym.make(args.env), actor_critic=ActorCritic, ac_kwargs=dict(hidden_dims=[args.hid]*args.l), # gamma=args.gamma, lam=args.lam, seed=args.seed, episodes_per_epoch=args.episodes_per_epoch, # epochs=args.epochs, logger_kwargs=logger_kwargs) gail(lambda: gym.make(args.env), actor_critic=ActorCritic, ac_kwargs=dict(hidden_dims=[args.hid] * args.l), disc=Discriminator, dc_kwargs=dict(hidden_dims=[args.hid] * args.l), gamma=args.gamma, lam=args.lam,