def main(): args = control_arg_parser().parse_args() # Training # ENV_path = get_dir(os.path.join(args.log_dir, args.env)) # log_dir = os.path.join(ENV_path, args.method + "-" + # '{}'.format(args.seed)) + "-" + \ # datetime.datetime.now().strftime("%m-%d-%H-%M") # logger.configure(dir=log_dir) # save_args(args) # train_copos(args.env, num_timesteps=args.num_timesteps * 1e6, seed=args.seed, trial=args.seed, # hist_len=args.hist_len, block_high=float(args.block_high), nsteps=args.nsteps, # method=args.method, hid_size=args.hid_size, give_state=bool(args.give_state), vf_iters=args.epoch) #Render load_path = '/Users/zhirong/Documents/ReinforcementLearning/tmp/LunarLanderContinuousPOMDP-v0/copos-guided-try-diffinput-0-07-05-13-34/checkpoints/00976.ckpt' video_path = '/Users/zhirong/Documents/ReinforcementLearning/tmp/LunarLanderContinuousPOMDP-v0/copos-guided-try-diffinput-0-07-05-13-34/' render(hid_size=args.hid_size, load_path=load_path, video_path=video_path, env_id=args.env, seed=0, hist_len=args.hist_len, block_high=args.block_high, give_state=0)
def main(): args = control_arg_parser().parse_args() if args.env == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=args.hist_len) if args.train is True: ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method +"-"+ '{0}'.format(args.seed))+"-" +\ datetime.datetime.now().strftime("%m-%d-%H-%M") # if MPI.COMM_WORLD.Get_rank() == 0: logger.configure(dir=log_dir) save_args(args) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, nsteps=args.nsteps, batch_size=args.batch_size, epoch=args.epoch, method=args.method, net_size=args.net_size, ncpu=args.ncpu, i_trial=args.seed, load_path=args.load_path, use_entr=int(args.use_entr)) if args.render is True: video_path = osp.split(osp.split(args.load_path)[0])[0] render(args.env, nsteps=args.nsteps, batch_size=args.batch_size, net_size=args.net_size, load_path=args.load_path, video_path=video_path, iters=args.iters)
def main(): args = control_arg_parser().parse_args() # rank = MPI.COMM_WORLD.Get_rank() # print("This is rank:", rank, "Seed:", args.seed) if args.env == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=args.hist_len, block_high=float(args.block_high), policy_name=args.policy_name) # if args.train is True: ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method +"-"+ '{}'.format(args.seed))+"-" +\ datetime.datetime.now().strftime("%m-%d-%H-%M") logger.configure(dir=log_dir) # logger.log("This is rank {}".format(rank)) save_args(args) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, hist_len=args.hist_len, nsteps=args.nsteps, batch_size=args.batch_size, epoch=args.epoch, env_name=args.env, method=args.method, net_size=tuple(args.net_size), ncpu=args.ncpu, policy_name=args.policy_name, load_path=args.load_path, use_entr=int(args.use_entr), rank=args.seed, checkpoint=args.checkpoint, filter_size=args.filter_size)
def main(): # args = mujoco_arg_parser().parse_args() args = control_arg_parser().parse_args() args.seed = 0 log_path = get_dir("/Users/zhirong/Documents/Masterthesis-code/tmp") # log_path = get_dir("/home/zhi/Documents/ReinforcementLearning/tmp") ENV_path = get_dir(os.path.join(log_path, args.env)) log_dir = os.path.join(ENV_path, datetime.datetime.now().strftime("trpo-%m-%d-%H-%M-%S")) logger.configure(dir=log_dir) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = control_arg_parser().parse_args() # Training ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method + "-" + '{}'.format(args.seed)) + "-" + \ datetime.datetime.now().strftime("%m-%d-%H-%M") logger.configure(dir=log_dir) save_args(args) train_copos(args.env, num_timesteps=args.num_timesteps * 1e6, seed=args.seed, trial=args.seed, hist_len=args.hist_len, block_high=float(args.block_high), nsteps=args.nsteps, method=args.method, hid_size=args.hid_size, give_state=bool(args.give_state), vf_iters=args.epoch)
def main(): args = control_arg_parser().parse_args() ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method + "-" + '{}'.format(args.seed)) + "-" + \ datetime.datetime.now().strftime("%m-%d-%H-%M") logger.configure(dir=log_dir) # logger.log("This is rank {}".format(rank)) save_args(args) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, trial=args.seed, hist_len=args.hist_len)
def main(): args = control_arg_parser().parse_args() ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method + "-" + '{}'.format(args.seed)) + "-" + \ datetime.datetime.now().strftime("%m-%d-%H-%M") logger.configure(dir=log_dir) save_args(args) # if args.env == 'LunarLanderContinuousPOMDP-v0': # newenv(hist_len=args.hist_len, block_high=float(args.block_high), policy_name=args.policy_name) train_copos(args.env, num_timesteps=args.num_timesteps * 1e6, seed=args.seed, trial=args.seed, hist_len=args.hist_len, block_high=float(args.block_high), nsteps=args.nsteps, method=args.method, hid_size=args.hid_size, give_state=args.give_state, vf_iters=args.epoch)