def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--load-path', default=None, type=str) parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture') parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant', help='Learning rate schedule') parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS") args = parser.parse_args() assert args.load_path != None logger.configure() play(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lr_schedule=args.lr_schedule, num_env=16, sil_update=args.sil_update, sil_beta=args.sil_beta, load_path=args.load_path)
def main(): """ Runs the test """ args = atari_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='mlp') args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn') parser.add_argument('--n_envs', default=8, type=int) args = parser.parse_args() logger.configure(folder='/serverdata/rohit/stablebaselines/{}/ppo'.format(args.env)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, n_envs=args.n_envs, policy=args.policy)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture') parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant', help='Learning rate schedule') parser.add_argument('--logdir', help='Directory for logging') args = parser.parse_args() logger.configure(args.logdir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lr_schedule=args.lr_schedule, num_cpu=16)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture') parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant', help='Learning rate schedule') parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS") parser.add_argument('--tensorboard-log', type=str, default='./tf_log/SIL') parser.add_argument('--tb', type=str, default='SIL_A2C') args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lr_schedule=args.lr_schedule, num_env=16, sil_update=args.sil_update, sil_beta=args.sil_beta, tensorboard_log=args.tensorboard_log, tb_log_name=args.tb)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument( '--policy', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn', help='Policy architecture', ) args = parser.parse_args() import os logger.configure(os.path.join('logs', args.env, 'baseline')) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture') parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant', help='Learning rate schedule') parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS") parser.add_argument('--tensorboard-log', type=str, default='./sf_log/recons2') parser.add_argument('--tb', type=str, default='SIL_A2C') parser.add_argument('--use-sf', action='store_true') parser.add_argument('--use-recons', action='store_true') args = parser.parse_args() logger.configure(folder="{}/{}".format(args.tensorboard_log, args.tb)) logger.info('use SF {}'.format(args.use_sf)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lr_schedule=args.lr_schedule, num_env=16, sil_update=args.sil_update, sil_beta=args.sil_beta, use_sf=args.use_sf, use_recons=args.use_recons, tensorboard_log=args.tensorboard_log, tb_log_name=args.tb)
def main(): """ Runs the test """ parser = atari_arg_parser() parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn', help='Policy architecture') parser.add_argument('--peer', type=float, default=0., help='Coefficient of the peer term. (default: 0)') parser.add_argument('--note', type=str, default='test', help='Log path') parser.add_argument('--individual', action='store_true', default=False, help='If true, no co-training is applied.') parser.add_argument('--start-episode', type=int, default=0, help='Add peer term after this episode.') parser.add_argument('--end-episode', type=int, default=10000, help='Remove peer term after this episode.') parser.add_argument('--decay-type', type=str, default=None, choices=[None, 'inc', 'dec', 'inc_dec'], help='Decay type for alpha') parser.add_argument('--repeat', type=int, default=1, help='Repeat training on the dataset in one epoch') args = parser.parse_args() set_global_seeds(args.seed) logger.configure(os.path.join('logs', args.env, args.note)) logger.info(args) scheduler = Scheduler(args.start_episode, args.end_episode, decay_type=args.decay_type) train( args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, peer=args.peer, scheduler=scheduler, individual=args.individual, repeat=args.repeat, )
def main(): """ Runs the test """ args = atari_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)