Beispiel #1
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    args = parser.parse_args()

    logdir = datetime.datetime.now().strftime("../logs/%Y-%m-%d-%H-%M-%S-%f")
    print(f"Logdir: {os.path.abspath(logdir)}")
    logger.configure(
        dir=logdir,
        format_strs=['stdout', 'tensorboard'],
    )

    train('BreakoutNoFrameskip-v4',
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=16)
Beispiel #2
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    # parser.add_argument('--logdir', help ='Directory for logging')
    args = parser.parse_args()
    # logger.configure(args.logdir)
    logdir = './logs/' + datetime.datetime.strftime(datetime.datetime.now(),
                                                    '%Y%m%d-%H%M%S')
    logger.configure(logdir)
    # train(args.env, num_timesteps=1e8, seed=args.seed,
    #       policy=args.policy, lrschedule=args.lrschedule, num_cpu=16, logdir=logdir)
    game = 'SonicTheHedgehog-Genesis'
    state = 'SpringYardZone.Act1'
    train(game=game,
          state=state,
          num_timesteps=1e8,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_cpu=16,
          logdir=logdir)
Beispiel #3
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--model-filename', help='Trained model filename', default='atari_a2c.gz')
    args = parser.parse_args()
    logger.configure()
    enjoy(args.env, args.seed, args.policy, args.model_filename)
Beispiel #4
0
def main():
    """
    Runs the test
    """
    args = atari_arg_parser().parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--sil-update',
                        type=int,
                        default=4,
                        help="Number of updates per iteration")
    parser.add_argument('--sil-beta',
                        type=float,
                        default=0.1,
                        help="Beta for weighted IS")
    parser.add_argument('--log', default='/tmp/a2c')
    args = parser.parse_args()
    logger.configure(dir=args.log)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          sil_update=args.sil_update,
          sil_beta=args.sil_beta,
          num_env=16)
Beispiel #6
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--param',
                        help='parameters of policy',
                        type=str,
                        default='action')
    parser.add_argument('--nenv', help='num of env', type=int, default=16)
    args = parser.parse_args()
    print(args.env)
    path = "./trainlog/" + args.env + "/" + "seed_" + str(
        args.seed
    ) + "_" + args.policy + "_" + args.param + "/" + args.env + "_" + datetime.datetime.now(
    ).strftime("openai-%Y-%m-%d-%H-%M-%S-%f")
    logger.configure(path)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=args.nenv,
          param=args.param)
Beispiel #7
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn')
    args = parser.parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy)
Beispiel #8
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--hparams_path', help='Load json hparams from this file', type=str, default='')
    parser.add_argument('--gpu_num', help='cuda gpu #', type=str, default='')

    args = parser.parse_args()

    with open(args.hparams_path, 'r') as f:
        hparams = json.load(f)

    if args.gpu_num:
        assert(int(args.gpu_num) >= -1 and int(args.gpu_num) <= 8)
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num
    elif 'gpu_num' in hparams:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(hparams.get('gpu_num'))

    log_path = os.path.join(hparams['base_dir'], 'logs', hparams['experiment_name'])
    logger.configure(dir=log_path)

    print('experiment_params: {}'.format(hparams))
    print('chosen env: {}'.format(hparams['env_id']))

    seed = 0
    if hparams.get('atari_seed'): seed = hparams['atari_seed']

    train(hparams['env_id'], num_timesteps=args.num_timesteps, seed=seed,
        policy=hparams['policy'], hparams=hparams)
Beispiel #9
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['i2a', 'cnn', 'lstm', 'lnlstm'],
                        default='i2a')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--lr', help='Learning rate', type=float, default=7e-4)
    parser.add_argument('--lambda_dist',
                        help='Distillation loss weight',
                        type=float,
                        default=0.01)
    parser.add_argument('--max_grad_norm',
                        help='Max grad norm',
                        type=float,
                        default=0.5)
    args = parser.parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=16,
          args=args)
Beispiel #10
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--log-dir',
                        help='Log directory where all logs will be written',
                        default=None)
    parser.add_argument('--log-formats',
                        help='Formats in which the logs will be written.',
                        default=None)
    args = parser.parse_args()
    logger.configure(args.log_dir, args.log_formats)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_cpu=16)
Beispiel #11
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--model-filename',
                        help='Trained model filename',
                        default='atari_a2c.gz')
    args = parser.parse_args()
    logger.configure()

    train(
        args.env,
        num_timesteps=args.num_timesteps,
        seed=args.seed,
        policy=args.policy,
        lrschedule=args.lrschedule,
        num_env=16,
        model_filename=args.model_filename,
    )
Beispiel #12
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant')
    args = parser.parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy, lrschedule=args.lrschedule, num_env=16)
Beispiel #13
0
def main():
    parser = atari_arg_parser()
    parser.add_argument(u'--policy', help=u'Policy architecture', choices=[u'cnn', u'lstm', u'lnlstm'], default=u'cnn')
    parser.add_argument(u'--lrschedule', help=u'Learning rate schedule', choices=[u'constant', u'linear'], default=u'constant')
    args = parser.parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy, lrschedule=args.lrschedule, num_env=16)
Beispiel #14
0
def main():
    args = atari_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          num_cpu=24,
          num_env=24)
Beispiel #15
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--log-dir', help='Log directory where all logs will be written', default=None)
    parser.add_argument('--log-formats', help='Formats in which the logs will be written.', default=None)
    args = parser.parse_args()
    logger.configure(args.log_dir, args.log_formats)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy)
Beispiel #16
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant')
    args = parser.parse_args()
    logdir = './logs/'+datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d-%H%M%S')
    logger.configure(logdir)
    train(args.env, num_timesteps=1e8, seed=args.seed,
          policy=args.policy, lrschedule=args.lrschedule, num_cpu=2, logdir=logdir)
Beispiel #17
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'caps'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant')
    args = parser.parse_args()
    logger.configure()
    run_timesteps = int(args.num_timesteps)
    debug_timesteps = int(1e4)
    train(args.env, num_timesteps=run_timesteps, seed=args.seed,
        policy=args.policy, lrschedule=args.lrschedule, num_env=16)
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant')
    args = parser.parse_args()
    args.num_timesteps = 4*1e7
    dir = osp.join('breakout',
                   datetime.datetime.now().strftime("Test-%Y-%m-%d-%H-%M-%S-%f"))
    logger.configure(dir=dir)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy, lrschedule=args.lrschedule, num_env=16)
Beispiel #19
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn')
    parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant')
    parser.add_argument('--replay_lambda', help='Replay regularizer parameter', default=1)
    parser.add_argument('--ss_rate', help='Subsampling rate', default=1)
    parser.add_argument('--replay_loss', help='Replay loss, if any', choices=['L2', 'Distillation'], default=None)
    parser.add_argument('--thetas', help='List of thetas to invert over', nargs='*', default=None)
    args = parser.parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
        policy=args.policy, lrschedule=args.lrschedule, num_env=16,
          replay_lambda=args.replay_lambda, ss_rate=args.ss_rate,
          replay_loss=args.replay_loss, thetas=args.thetas)
Beispiel #20
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--log-dir',
                        help='Log directory where all logs will be written',
                        default=None)
    parser.add_argument('--log-formats',
                        help='Formats in which the logs will be written.',
                        default=None)
    args = parser.parse_args()
    logger.configure(args.log_dir, args.log_format)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          num_cpu=32)
Beispiel #21
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--flags', '-f', help="flags cfg file", default=None)
    args = parser.parse_args()

    flags = AcerFlags.from_cfg(args.flags) if args.flags else AcerFlags()
    logger.configure(flags.log_dir)

    env = make_atari_env(args.env, num_env=flags.num_env, seed=flags.seed)

    policy_fn = models.get(args.policy)
    learn(policy_fn, env, flags)

    env.close()
Beispiel #22
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    args = parser.parse_args()
    # logger.configure()
    logger.configure("./log/" + "BeamRider" + "/" + "Loss_" + str(1) +
                     "_Run_" + str(0))
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy)
Beispiel #23
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    # parser.add_argument('--logdir', help ='Directory for logging')
    args = parser.parse_args()
    # logger.configure(args.logdir)
    logdir = './logs/' + datetime.datetime.strftime(datetime.datetime.now(),
                                                    '%Y%m%d-%H%M%S')
    logger.configure(logdir)
    # train(args.env, num_timesteps=1e8, seed=args.seed,
    #       policy=args.policy, lrschedule=args.lrschedule, num_cpu=16, logdir=logdir)
    game = 'SonicTheHedgehog-Genesis'
    state = 'SpringYardZone.Act1'
    # load_model_steps = 3833
    # load_model_rewards = 4948
    # load_path = "logs/0_628_998/{}_{}".format(load_model_steps, load_model_rewards)
    # load_info = {'path': load_path,
    #              'steps': load_model_steps,
    #              'rewards': load_model_rewards}
    load_model_steps = 4992
    load_model_rewards = 5163
    # load_model_steps = 8047
    # load_model_rewards = 6426
    # load_path = "logs/0_628_998_3833/{}_{}".format(load_model_steps, load_model_rewards)
    load_path = "logs/3833_4992/{}_{}".format(load_model_steps,
                                              load_model_rewards)
    load_info = {
        'path': load_path,
        'steps': load_model_steps,
        'rewards': load_model_rewards
    }
    # load_info=None
    train(game=game,
          state=state,
          num_timesteps=1e8,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_cpu=1,
          logdir=logdir,
          load_info=load_info)
Beispiel #24
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--hparams_path',
                        help='Load json hparams from this file',
                        type=str,
                        default='')

    parser.add_argument('--gpu_num', help='cuda gpu #', type=str, default='')

    args = parser.parse_args()

    with open(args.hparams_path, 'r') as f:
        hparams = json.load(f)

    if args.gpu_num:
        assert (int(args.gpu_num) >= -1 and int(args.gpu_num) <= 8)
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_num
    elif 'gpu_num' in hparams:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(hparams.get('gpu_num'))

    log_path = os.path.join(hparams['base_dir'], 'logs',
                            hparams['experiment_name'])

    print('experiment_params: {}'.format(hparams))
    print('chosen env: {}'.format(hparams['env_id']))

    seed = 0
    if hparams.get('atari_seed'): seed = hparams['atari_seed']

    logger.configure(dir=log_path)
    train(
        env_id=hparams['env_id'],
        num_timesteps=hparams['total_timesteps'],
        seed=seed,
        policy=hparams['policy'],
        lrschedule=args.lrschedule,
        num_env=hparams['num_env'],
        ckpt_path=hparams['restore_from_ckpt_path'],
        hparams=hparams,
    )
Beispiel #25
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--ent', type=float)
    parser.add_argument('--lr', type=float)
    parser.add_argument('--policy', type=str)
    parser.add_argument('--save_name', default=None, type=str)
    args = parser.parse_args()
    logger.configure()
    print('saving to:{}'.format(args.save_name))
    train(num_timesteps=110000000,
          env_name=args.env,
          seed=args.seed,
          policy=args.policy,
          lrschedule='constant',
          num_env=16,
          entrophy=args.ent,
          lr=args.lr,
          save_name=args.save_name)
Beispiel #26
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--log-dir',
                        help='Log directory where all logs will be written',
                        default=None)
    parser.add_argument('--log-formats',
                        help='Formats in which the logs will be written.',
                        default=None)
    args = parser.parse_args()
    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        logger.configure(args.log_dir, args.log_formats)
    else:
        logger.configure(log_dir=args.log_dir, format_strs=[])
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          rank=rank)
Beispiel #27
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='lstm')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--logdir', help='Directory for logging')
    args = parser.parse_args()
    logger.configure(args.logdir, ['stdout', 'log', 'csv', 'tensorboard'])
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_cpu=16)
Beispiel #28
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    parser.add_argument('--log_dir',
                        help='the directory to save log file',
                        default='log')
    args = parser.parse_args()
    logger.configure(dir=args.log_dir)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=16)
Beispiel #29
0
def main():
    parser = atari_arg_parser(
    )  # Create an argparse.ArgumentParser for run_atari.py.(contains env_id, seed, num_timesteps args for
    # train() )
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    # Maybe add argument for epsilon greedy here???? -- No, doesnt make sense here!!!
    args = parser.parse_args()
    logger.configure()
    # Train "num_env" envs, each running "env_id" for "num_timesteps" timesteps with a policy architecture "policy"
    # with a Learning rate schedule "lrschedule"
    train(env_id=args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=1)  # 1) Train 16 envs
Beispiel #30
0
def main():
    parser = atari_arg_parser()
    # Below line is unnecessary because atari_arg_parser() handles env and steps to run for
    #parser.add_argument('--env', help='Atari Environment', default='BreakoutNoFrameskip-v0')
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='constant')
    args = parser.parse_args()
    logger.configure()
    print("xxxxxxxxxxxxxxxxxxxxxxxx            : " + args.env)

    # train(...) initializes environments, and calls learn(...) with all arguments
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=1)
Beispiel #31
0
def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm', 'mlp'],
                        default='cnn')
    parser.add_argument('--use-penal', help='enable penal', default=False)
    parser.add_argument('--gpu', type=int, default=0, help='GPU selection')
    parser.add_argument('--pg-rate', type=float, default=0.0)
    # parser.add_argument('--save-dir', default='.logs', type=str)
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = '%d' % args.gpu
    print("game %s run on GPU: %d" % (args.env, args.gpu))
    logger.configure(
        args.env + '_seed_' + str(args.seed) + '_nopen' + '_pg' +
        str(args.pg_rate) if not args.use_penal else args.env + '_seed_' +
        str(args.seed) + '_pen' + '_pg' + str(args.pg_rate),
        ['log', 'tensorboard'])
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          args=args)
Beispiel #32
0
def main():
    args = atari_arg_parser().parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, num_cpu=32)
Beispiel #33
0
def main():
    args = atari_arg_parser().parse_args()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)