def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logdir = datetime.datetime.now().strftime("../logs/%Y-%m-%d-%H-%M-%S-%f") print(f"Logdir: {os.path.abspath(logdir)}") logger.configure( dir=logdir, format_strs=['stdout', 'tensorboard'], ) train('BreakoutNoFrameskip-v4', num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') # parser.add_argument('--logdir', help ='Directory for logging') args = parser.parse_args() # logger.configure(args.logdir) logdir = './logs/' + datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d-%H%M%S') logger.configure(logdir) # train(args.env, num_timesteps=1e8, seed=args.seed, # policy=args.policy, lrschedule=args.lrschedule, num_cpu=16, logdir=logdir) game = 'SonicTheHedgehog-Genesis' state = 'SpringYardZone.Act1' train(game=game, state=state, num_timesteps=1e8, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_cpu=16, logdir=logdir)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--model-filename', help='Trained model filename', default='atari_a2c.gz') args = parser.parse_args() logger.configure() enjoy(args.env, args.seed, args.policy, args.model_filename)
def main(): """ Runs the test """ args = atari_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration") parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS") parser.add_argument('--log', default='/tmp/a2c') args = parser.parse_args() logger.configure(dir=args.log) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, sil_update=args.sil_update, sil_beta=args.sil_beta, num_env=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--param', help='parameters of policy', type=str, default='action') parser.add_argument('--nenv', help='num of env', type=int, default=16) args = parser.parse_args() print(args.env) path = "./trainlog/" + args.env + "/" + "seed_" + str( args.seed ) + "_" + args.policy + "_" + args.param + "/" + args.env + "_" + datetime.datetime.now( ).strftime("openai-%Y-%m-%d-%H-%M-%S-%f") logger.configure(path) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=args.nenv, param=args.param)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn') args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy)
def main(): parser = atari_arg_parser() parser.add_argument('--hparams_path', help='Load json hparams from this file', type=str, default='') parser.add_argument('--gpu_num', help='cuda gpu #', type=str, default='') args = parser.parse_args() with open(args.hparams_path, 'r') as f: hparams = json.load(f) if args.gpu_num: assert(int(args.gpu_num) >= -1 and int(args.gpu_num) <= 8) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num elif 'gpu_num' in hparams: os.environ['CUDA_VISIBLE_DEVICES'] = str(hparams.get('gpu_num')) log_path = os.path.join(hparams['base_dir'], 'logs', hparams['experiment_name']) logger.configure(dir=log_path) print('experiment_params: {}'.format(hparams)) print('chosen env: {}'.format(hparams['env_id'])) seed = 0 if hparams.get('atari_seed'): seed = hparams['atari_seed'] train(hparams['env_id'], num_timesteps=args.num_timesteps, seed=seed, policy=hparams['policy'], hparams=hparams)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['i2a', 'cnn', 'lstm', 'lnlstm'], default='i2a') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--lr', help='Learning rate', type=float, default=7e-4) parser.add_argument('--lambda_dist', help='Distillation loss weight', type=float, default=0.01) parser.add_argument('--max_grad_norm', help='Max grad norm', type=float, default=0.5) args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16, args=args)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None) parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None) args = parser.parse_args() logger.configure(args.log_dir, args.log_formats) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_cpu=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--model-filename', help='Trained model filename', default='atari_a2c.gz') args = parser.parse_args() logger.configure() train( args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16, model_filename=args.model_filename, )
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = atari_arg_parser() parser.add_argument(u'--policy', help=u'Policy architecture', choices=[u'cnn', u'lstm', u'lnlstm'], default=u'cnn') parser.add_argument(u'--lrschedule', help=u'Learning rate schedule', choices=[u'constant', u'linear'], default=u'constant') args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): args = atari_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=24, num_env=24)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None) parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None) args = parser.parse_args() logger.configure(args.log_dir, args.log_formats) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logdir = './logs/'+datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d-%H%M%S') logger.configure(logdir) train(args.env, num_timesteps=1e8, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_cpu=2, logdir=logdir)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'caps'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logger.configure() run_timesteps = int(args.num_timesteps) debug_timesteps = int(1e4) train(args.env, num_timesteps=run_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() args.num_timesteps = 4*1e7 dir = osp.join('breakout', datetime.datetime.now().strftime("Test-%Y-%m-%d-%H-%M-%S-%f")) logger.configure(dir=dir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--replay_lambda', help='Replay regularizer parameter', default=1) parser.add_argument('--ss_rate', help='Subsampling rate', default=1) parser.add_argument('--replay_loss', help='Replay loss, if any', choices=['L2', 'Distillation'], default=None) parser.add_argument('--thetas', help='List of thetas to invert over', nargs='*', default=None) args = parser.parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16, replay_lambda=args.replay_lambda, ss_rate=args.ss_rate, replay_loss=args.replay_loss, thetas=args.thetas)
def main(): parser = atari_arg_parser() parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None) parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None) args = parser.parse_args() logger.configure(args.log_dir, args.log_format) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
def main(): parser = atari_arg_parser() parser.add_argument('--flags', '-f', help="flags cfg file", default=None) args = parser.parse_args() flags = AcerFlags.from_cfg(args.flags) if args.flags else AcerFlags() logger.configure(flags.log_dir) env = make_atari_env(args.env, num_env=flags.num_env, seed=flags.seed) policy_fn = models.get(args.policy) learn(policy_fn, env, flags) env.close()
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') args = parser.parse_args() # logger.configure() logger.configure("./log/" + "BeamRider" + "/" + "Loss_" + str(1) + "_Run_" + str(0)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') # parser.add_argument('--logdir', help ='Directory for logging') args = parser.parse_args() # logger.configure(args.logdir) logdir = './logs/' + datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d-%H%M%S') logger.configure(logdir) # train(args.env, num_timesteps=1e8, seed=args.seed, # policy=args.policy, lrschedule=args.lrschedule, num_cpu=16, logdir=logdir) game = 'SonicTheHedgehog-Genesis' state = 'SpringYardZone.Act1' # load_model_steps = 3833 # load_model_rewards = 4948 # load_path = "logs/0_628_998/{}_{}".format(load_model_steps, load_model_rewards) # load_info = {'path': load_path, # 'steps': load_model_steps, # 'rewards': load_model_rewards} load_model_steps = 4992 load_model_rewards = 5163 # load_model_steps = 8047 # load_model_rewards = 6426 # load_path = "logs/0_628_998_3833/{}_{}".format(load_model_steps, load_model_rewards) load_path = "logs/3833_4992/{}_{}".format(load_model_steps, load_model_rewards) load_info = { 'path': load_path, 'steps': load_model_steps, 'rewards': load_model_rewards } # load_info=None train(game=game, state=state, num_timesteps=1e8, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_cpu=1, logdir=logdir, load_info=load_info)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--hparams_path', help='Load json hparams from this file', type=str, default='') parser.add_argument('--gpu_num', help='cuda gpu #', type=str, default='') args = parser.parse_args() with open(args.hparams_path, 'r') as f: hparams = json.load(f) if args.gpu_num: assert (int(args.gpu_num) >= -1 and int(args.gpu_num) <= 8) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num elif 'gpu_num' in hparams: os.environ['CUDA_VISIBLE_DEVICES'] = str(hparams.get('gpu_num')) log_path = os.path.join(hparams['base_dir'], 'logs', hparams['experiment_name']) print('experiment_params: {}'.format(hparams)) print('chosen env: {}'.format(hparams['env_id'])) seed = 0 if hparams.get('atari_seed'): seed = hparams['atari_seed'] logger.configure(dir=log_path) train( env_id=hparams['env_id'], num_timesteps=hparams['total_timesteps'], seed=seed, policy=hparams['policy'], lrschedule=args.lrschedule, num_env=hparams['num_env'], ckpt_path=hparams['restore_from_ckpt_path'], hparams=hparams, )
def main(): parser = atari_arg_parser() parser.add_argument('--ent', type=float) parser.add_argument('--lr', type=float) parser.add_argument('--policy', type=str) parser.add_argument('--save_name', default=None, type=str) args = parser.parse_args() logger.configure() print('saving to:{}'.format(args.save_name)) train(num_timesteps=110000000, env_name=args.env, seed=args.seed, policy=args.policy, lrschedule='constant', num_env=16, entrophy=args.ent, lr=args.lr, save_name=args.save_name)
def main(): parser = atari_arg_parser() parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None) parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None) args = parser.parse_args() rank = MPI.COMM_WORLD.Get_rank() if rank == 0: logger.configure(args.log_dir, args.log_formats) else: logger.configure(log_dir=args.log_dir, format_strs=[]) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, rank=rank)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='lstm') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--logdir', help='Directory for logging') args = parser.parse_args() logger.configure(args.logdir, ['stdout', 'log', 'csv', 'tensorboard']) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_cpu=16)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument('--log_dir', help='the directory to save log file', default='log') args = parser.parse_args() logger.configure(dir=args.log_dir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main(): parser = atari_arg_parser( ) # Create an argparse.ArgumentParser for run_atari.py.(contains env_id, seed, num_timesteps args for # train() ) parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') # Maybe add argument for epsilon greedy here???? -- No, doesnt make sense here!!! args = parser.parse_args() logger.configure() # Train "num_env" envs, each running "env_id" for "num_timesteps" timesteps with a policy architecture "policy" # with a Learning rate schedule "lrschedule" train(env_id=args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=1) # 1) Train 16 envs
def main(): parser = atari_arg_parser() # Below line is unnecessary because atari_arg_parser() handles env and steps to run for #parser.add_argument('--env', help='Atari Environment', default='BreakoutNoFrameskip-v0') parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') args = parser.parse_args() logger.configure() print("xxxxxxxxxxxxxxxxxxxxxxxx : " + args.env) # train(...) initializes environments, and calls learn(...) with all arguments train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=1)
def main(): parser = atari_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn') parser.add_argument('--use-penal', help='enable penal', default=False) parser.add_argument('--gpu', type=int, default=0, help='GPU selection') parser.add_argument('--pg-rate', type=float, default=0.0) # parser.add_argument('--save-dir', default='.logs', type=str) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = '%d' % args.gpu print("game %s run on GPU: %d" % (args.env, args.gpu)) logger.configure( args.env + '_seed_' + str(args.seed) + '_nopen' + '_pg' + str(args.pg_rate) if not args.use_penal else args.env + '_seed_' + str(args.seed) + '_pen' + '_pg' + str(args.pg_rate), ['log', 'tensorboard']) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, args=args)
def main(): args = atari_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
def main(): args = atari_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)