def main(): args = control_arg_parser().parse_args() if args.env == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=args.hist_len) if args.train is True: ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method +"-"+ '{0}'.format(args.seed))+"-" +\ datetime.datetime.now().strftime("%m-%d-%H-%M") # if MPI.COMM_WORLD.Get_rank() == 0: logger.configure(dir=log_dir) save_args(args) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, nsteps=args.nsteps, batch_size=args.batch_size, epoch=args.epoch, method=args.method, net_size=args.net_size, ncpu=args.ncpu, i_trial=args.seed, load_path=args.load_path, use_entr=int(args.use_entr)) if args.render is True: video_path = osp.split(osp.split(args.load_path)[0])[0] render(args.env, nsteps=args.nsteps, batch_size=args.batch_size, net_size=args.net_size, load_path=args.load_path, video_path=video_path, iters=args.iters)
def main(): args = control_arg_parser().parse_args() # rank = MPI.COMM_WORLD.Get_rank() # print("This is rank:", rank, "Seed:", args.seed) if args.env == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=args.hist_len, block_high=float(args.block_high), policy_name=args.policy_name) # if args.train is True: ENV_path = get_dir(os.path.join(args.log_dir, args.env)) log_dir = os.path.join(ENV_path, args.method +"-"+ '{}'.format(args.seed))+"-" +\ datetime.datetime.now().strftime("%m-%d-%H-%M") logger.configure(dir=log_dir) # logger.log("This is rank {}".format(rank)) save_args(args) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, hist_len=args.hist_len, nsteps=args.nsteps, batch_size=args.batch_size, epoch=args.epoch, env_name=args.env, method=args.method, net_size=tuple(args.net_size), ncpu=args.ncpu, policy_name=args.policy_name, load_path=args.load_path, use_entr=int(args.use_entr), rank=args.seed, checkpoint=args.checkpoint, filter_size=args.filter_size)
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) if env_id == 'LunarLanderContinuousPOMDP-v0': newenv(hist_len=hist_len, block_high=block_high, version0=version0, give_state=give_state) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True, version0=version0) env.seed(seed) return env
# from baselines.env.box2d.lunar_lander_pomdp import LunarLanderContinuousPOMDP from baselines import bench, logger from baselines.env.envsetting import newenv import gym newenv(hist_len=5, block_high=5/8) env = gym.make('LunarLanderContinuousPOMDP-v0') # env = LunarLanderContinuousPOMDP(hist_len=0) env = bench.Monitor(env, logger.get_dir()) obs,state = env.reset() ob_shapce = env.observation_space total_shape = env.total_space print("obs:{0}, state:{1}".format(obs, state)) print("total space:", total_shape, total_shape.shape) print("obs space:", ob_shapce, ob_shapce.shape) l = 0 # while True: # env.render(mode="rgb_array") # ac = env.action_space.sample() # [obs,state], rwd, done, _ = env.step(ac) # print("obs:{0}, state:{1}".format(obs, state)) # if done: # break # l+=1 # print("Episode Length from baselines:{}".format(l))
import pandas as pd import imageio import numpy as np import joblib import tensorflow as tf from baselines import bench, logger from baselines.common.vec_env.vec_normalize import VecNormalize from baselines.common.vec_env.dummy_vec_env import DummyVecEnv from baselines.env.envsetting import newenv save_path = '/home/zhi/Documents/ReinforcementLearning/tmp' hist_len = 10 block_hight = 5 / 8 newenv(hist_len=hist_len, block_high=block_hight, policy_name='MlpPolicy') def make_env(): env = gym.make('LunarLanderContinuousPOMDP-v0') env = bench.Monitor(env, os.path.join(save_path, 'render-result'), allow_early_resets=True) return env env = DummyVecEnv([make_env]) env = VecNormalize(env) ob = env.reset()