Exemple #1
0
def main():
    """
  Runs the test
  """
    parser = atari_arg_parser()
    parser.add_argument('--load-path', default=None, type=str)
    parser.add_argument('--policy',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn',
                        help='Policy architecture')
    parser.add_argument('--lr_schedule',
                        choices=['constant', 'linear'],
                        default='constant',
                        help='Learning rate schedule')
    parser.add_argument('--sil-update',
                        type=int,
                        default=4,
                        help="Number of updates per iteration")
    parser.add_argument('--sil-beta',
                        type=float,
                        default=0.1,
                        help="Beta for weighted IS")
    args = parser.parse_args()
    assert args.load_path != None
    logger.configure()
    play(args.env,
         num_timesteps=args.num_timesteps,
         seed=args.seed,
         policy=args.policy,
         lr_schedule=args.lr_schedule,
         num_env=16,
         sil_update=args.sil_update,
         sil_beta=args.sil_beta,
         load_path=args.load_path)
Exemple #2
0
def main():
    """
    Runs the test
    """
    args = atari_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          num_cpu=32)
Exemple #3
0
def main():
    """
    Runs the test
    """
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='mlp')
    args = parser.parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
          policy=args.policy)
Exemple #4
0
def main():
    """
    Runs the test
    """
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='cnn')
    parser.add_argument('--n_envs', default=8, type=int)
    args = parser.parse_args()
    logger.configure(folder='/serverdata/rohit/stablebaselines/{}/ppo'.format(args.env))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, n_envs=args.n_envs,
          policy=args.policy)
Exemple #5
0
def main():
    """
    Runs the test
    """
    parser = atari_arg_parser()
    parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture')
    parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant',
                        help='Learning rate schedule')
    parser.add_argument('--logdir', help='Directory for logging')
    args = parser.parse_args()
    logger.configure(args.logdir)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed,
          policy=args.policy, lr_schedule=args.lr_schedule, num_cpu=16)
Exemple #6
0
def main():
  """
  Runs the test
  """
  parser = atari_arg_parser()
  parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm'], default='cnn', help='Policy architecture')
  parser.add_argument('--lr_schedule', choices=['constant', 'linear'], default='constant',
                      help='Learning rate schedule')
  parser.add_argument('--sil-update', type=int, default=4, help="Number of updates per iteration")
  parser.add_argument('--sil-beta', type=float, default=0.1, help="Beta for weighted IS")
  parser.add_argument('--tensorboard-log', type=str, default='./tf_log/SIL')
  parser.add_argument('--tb', type=str, default='SIL_A2C')
  args = parser.parse_args()
  logger.configure()
  train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lr_schedule=args.lr_schedule,
        num_env=16, sil_update=args.sil_update, sil_beta=args.sil_beta,
        tensorboard_log=args.tensorboard_log, tb_log_name=args.tb)
Exemple #7
0
def main():
    """
    Runs the test
    """
    parser = atari_arg_parser()
    parser.add_argument(
        '--policy',
        choices=['cnn', 'lstm', 'lnlstm', 'mlp'],
        default='cnn',
        help='Policy architecture',
    )
    args = parser.parse_args()
    import os
    logger.configure(os.path.join('logs', args.env, 'baseline'))
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy)
Exemple #8
0
def main():
    """
  Runs the test
  """
    parser = atari_arg_parser()
    parser.add_argument('--policy',
                        choices=['cnn', 'lstm', 'lnlstm'],
                        default='cnn',
                        help='Policy architecture')
    parser.add_argument('--lr_schedule',
                        choices=['constant', 'linear'],
                        default='constant',
                        help='Learning rate schedule')
    parser.add_argument('--sil-update',
                        type=int,
                        default=4,
                        help="Number of updates per iteration")
    parser.add_argument('--sil-beta',
                        type=float,
                        default=0.1,
                        help="Beta for weighted IS")
    parser.add_argument('--tensorboard-log',
                        type=str,
                        default='./sf_log/recons2')
    parser.add_argument('--tb', type=str, default='SIL_A2C')
    parser.add_argument('--use-sf', action='store_true')
    parser.add_argument('--use-recons', action='store_true')
    args = parser.parse_args()
    logger.configure(folder="{}/{}".format(args.tensorboard_log, args.tb))
    logger.info('use SF {}'.format(args.use_sf))
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lr_schedule=args.lr_schedule,
          num_env=16,
          sil_update=args.sil_update,
          sil_beta=args.sil_beta,
          use_sf=args.use_sf,
          use_recons=args.use_recons,
          tensorboard_log=args.tensorboard_log,
          tb_log_name=args.tb)
Exemple #9
0
def main():
    """
    Runs the test
    """
    parser = atari_arg_parser()
    parser.add_argument('--policy', choices=['cnn', 'lstm', 'lnlstm', 'mlp'],
                        default='cnn', help='Policy architecture')
    parser.add_argument('--peer', type=float, default=0.,
                        help='Coefficient of the peer term. (default: 0)')
    parser.add_argument('--note', type=str, default='test',
                        help='Log path')
    parser.add_argument('--individual', action='store_true', default=False,
                        help='If true, no co-training is applied.')
    parser.add_argument('--start-episode', type=int, default=0,
                        help='Add peer term after this episode.')
    parser.add_argument('--end-episode', type=int, default=10000,
                        help='Remove peer term after this episode.')
    parser.add_argument('--decay-type', type=str, default=None, 
                        choices=[None, 'inc', 'dec', 'inc_dec'],
                        help='Decay type for alpha')
    parser.add_argument('--repeat', type=int, default=1,
                        help='Repeat training on the dataset in one epoch')
    args = parser.parse_args()

    set_global_seeds(args.seed)

    logger.configure(os.path.join('logs', args.env, args.note))
    logger.info(args)
    scheduler = Scheduler(args.start_episode, args.end_episode, decay_type=args.decay_type)
    train(
        args.env,
        num_timesteps=args.num_timesteps,
        seed=args.seed,
        policy=args.policy,
        peer=args.peer,
        scheduler=scheduler,
        individual=args.individual,
        repeat=args.repeat,
    )
def main():
    """
    Runs the test
    """
    args = atari_arg_parser().parse_args()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)