Ejemplo n.º 1
0
def main():
    args = control_arg_parser().parse_args()

    # Training

    # ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    # log_dir = os.path.join(ENV_path, args.method + "-" +
    #                        '{}'.format(args.seed)) + "-" + \
    #           datetime.datetime.now().strftime("%m-%d-%H-%M")
    # logger.configure(dir=log_dir)
    # save_args(args)
    # train_copos(args.env, num_timesteps=args.num_timesteps * 1e6, seed=args.seed, trial=args.seed,
    #             hist_len=args.hist_len, block_high=float(args.block_high), nsteps=args.nsteps,
    #             method=args.method, hid_size=args.hid_size, give_state=bool(args.give_state), vf_iters=args.epoch)

    #Render

    load_path = '/Users/zhirong/Documents/ReinforcementLearning/tmp/LunarLanderContinuousPOMDP-v0/copos-guided-try-diffinput-0-07-05-13-34/checkpoints/00976.ckpt'
    video_path = '/Users/zhirong/Documents/ReinforcementLearning/tmp/LunarLanderContinuousPOMDP-v0/copos-guided-try-diffinput-0-07-05-13-34/'
    render(hid_size=args.hid_size,
           load_path=load_path,
           video_path=video_path,
           env_id=args.env,
           seed=0,
           hist_len=args.hist_len,
           block_high=args.block_high,
           give_state=0)
Ejemplo n.º 2
0
def main():
    args = control_arg_parser().parse_args()
    if args.env == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=args.hist_len)
    if args.train is True:
        ENV_path = get_dir(os.path.join(args.log_dir, args.env))
        log_dir = os.path.join(ENV_path, args.method +"-"+
                               '{0}'.format(args.seed))+"-" +\
                  datetime.datetime.now().strftime("%m-%d-%H-%M")

        # if MPI.COMM_WORLD.Get_rank() == 0:
        logger.configure(dir=log_dir)
        save_args(args)
        train(args.env,
              num_timesteps=args.num_timesteps,
              seed=args.seed,
              nsteps=args.nsteps,
              batch_size=args.batch_size,
              epoch=args.epoch,
              method=args.method,
              net_size=args.net_size,
              ncpu=args.ncpu,
              i_trial=args.seed,
              load_path=args.load_path,
              use_entr=int(args.use_entr))
    if args.render is True:
        video_path = osp.split(osp.split(args.load_path)[0])[0]
        render(args.env,
               nsteps=args.nsteps,
               batch_size=args.batch_size,
               net_size=args.net_size,
               load_path=args.load_path,
               video_path=video_path,
               iters=args.iters)
Ejemplo n.º 3
0
def main():
    args = control_arg_parser().parse_args()
    # rank = MPI.COMM_WORLD.Get_rank()
    # print("This is rank:", rank, "Seed:", args.seed)
    if args.env == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=args.hist_len,
               block_high=float(args.block_high),
               policy_name=args.policy_name)
    # if args.train is True:
    ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    log_dir = os.path.join(ENV_path, args.method +"-"+
                           '{}'.format(args.seed))+"-" +\
              datetime.datetime.now().strftime("%m-%d-%H-%M")
    logger.configure(dir=log_dir)
    # logger.log("This is rank {}".format(rank))
    save_args(args)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          hist_len=args.hist_len,
          nsteps=args.nsteps,
          batch_size=args.batch_size,
          epoch=args.epoch,
          env_name=args.env,
          method=args.method,
          net_size=tuple(args.net_size),
          ncpu=args.ncpu,
          policy_name=args.policy_name,
          load_path=args.load_path,
          use_entr=int(args.use_entr),
          rank=args.seed,
          checkpoint=args.checkpoint,
          filter_size=args.filter_size)
Ejemplo n.º 4
0
def main():
    # args = mujoco_arg_parser().parse_args()
    args = control_arg_parser().parse_args()
    args.seed = 0
    log_path = get_dir("/Users/zhirong/Documents/Masterthesis-code/tmp")
    # log_path = get_dir("/home/zhi/Documents/ReinforcementLearning/tmp")
    ENV_path = get_dir(os.path.join(log_path, args.env))
    log_dir = os.path.join(ENV_path, datetime.datetime.now().strftime("trpo-%m-%d-%H-%M-%S"))
    logger.configure(dir=log_dir)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Ejemplo n.º 5
0
def main():
    args = control_arg_parser().parse_args()

    # Training

    ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    log_dir = os.path.join(ENV_path, args.method + "-" +
                           '{}'.format(args.seed)) + "-" + \
              datetime.datetime.now().strftime("%m-%d-%H-%M")
    logger.configure(dir=log_dir)
    save_args(args)
    train_copos(args.env, num_timesteps=args.num_timesteps * 1e6, seed=args.seed, trial=args.seed,
                hist_len=args.hist_len, block_high=float(args.block_high), nsteps=args.nsteps,
                method=args.method, hid_size=args.hid_size, give_state=bool(args.give_state), vf_iters=args.epoch)
Ejemplo n.º 6
0
def main():
    args = control_arg_parser().parse_args()
    ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    log_dir = os.path.join(ENV_path, args.method + "-" +
                           '{}'.format(args.seed)) + "-" + \
              datetime.datetime.now().strftime("%m-%d-%H-%M")
    logger.configure(dir=log_dir)
    # logger.log("This is rank {}".format(rank))
    save_args(args)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          trial=args.seed,
          hist_len=args.hist_len)
Ejemplo n.º 7
0
def main():
    args = control_arg_parser().parse_args()
    ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    log_dir = os.path.join(ENV_path, args.method + "-" +
                           '{}'.format(args.seed)) + "-" + \
              datetime.datetime.now().strftime("%m-%d-%H-%M")
    logger.configure(dir=log_dir)
    save_args(args)
    # if args.env == 'LunarLanderContinuousPOMDP-v0':
    #     newenv(hist_len=args.hist_len, block_high=float(args.block_high), policy_name=args.policy_name)
    train_copos(args.env,
                num_timesteps=args.num_timesteps * 1e6,
                seed=args.seed,
                trial=args.seed,
                hist_len=args.hist_len,
                block_high=float(args.block_high),
                nsteps=args.nsteps,
                method=args.method,
                hid_size=args.hid_size,
                give_state=args.give_state,
                vf_iters=args.epoch)