예제 #1
0
def main():
    args = control_arg_parser().parse_args()
    if args.env == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=args.hist_len)
    if args.train is True:
        ENV_path = get_dir(os.path.join(args.log_dir, args.env))
        log_dir = os.path.join(ENV_path, args.method +"-"+
                               '{0}'.format(args.seed))+"-" +\
                  datetime.datetime.now().strftime("%m-%d-%H-%M")

        # if MPI.COMM_WORLD.Get_rank() == 0:
        logger.configure(dir=log_dir)
        save_args(args)
        train(args.env,
              num_timesteps=args.num_timesteps,
              seed=args.seed,
              nsteps=args.nsteps,
              batch_size=args.batch_size,
              epoch=args.epoch,
              method=args.method,
              net_size=args.net_size,
              ncpu=args.ncpu,
              i_trial=args.seed,
              load_path=args.load_path,
              use_entr=int(args.use_entr))
    if args.render is True:
        video_path = osp.split(osp.split(args.load_path)[0])[0]
        render(args.env,
               nsteps=args.nsteps,
               batch_size=args.batch_size,
               net_size=args.net_size,
               load_path=args.load_path,
               video_path=video_path,
               iters=args.iters)
예제 #2
0
def main():
    args = control_arg_parser().parse_args()
    # rank = MPI.COMM_WORLD.Get_rank()
    # print("This is rank:", rank, "Seed:", args.seed)
    if args.env == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=args.hist_len,
               block_high=float(args.block_high),
               policy_name=args.policy_name)
    # if args.train is True:
    ENV_path = get_dir(os.path.join(args.log_dir, args.env))
    log_dir = os.path.join(ENV_path, args.method +"-"+
                           '{}'.format(args.seed))+"-" +\
              datetime.datetime.now().strftime("%m-%d-%H-%M")
    logger.configure(dir=log_dir)
    # logger.log("This is rank {}".format(rank))
    save_args(args)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          hist_len=args.hist_len,
          nsteps=args.nsteps,
          batch_size=args.batch_size,
          epoch=args.epoch,
          env_name=args.env,
          method=args.method,
          net_size=tuple(args.net_size),
          ncpu=args.ncpu,
          policy_name=args.policy_name,
          load_path=args.load_path,
          use_entr=int(args.use_entr),
          rank=args.seed,
          checkpoint=args.checkpoint,
          filter_size=args.filter_size)
예제 #3
0
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    if env_id == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=hist_len,
               block_high=block_high,
               version0=version0,
               give_state=give_state)
    env = gym.make(env_id)
    env = Monitor(env,
                  logger.get_dir(),
                  allow_early_resets=True,
                  version0=version0)
    env.seed(seed)
    return env
예제 #4
0
# from baselines.env.box2d.lunar_lander_pomdp import LunarLanderContinuousPOMDP
from baselines import bench, logger
from baselines.env.envsetting import newenv
import gym


newenv(hist_len=5, block_high=5/8)
env = gym.make('LunarLanderContinuousPOMDP-v0')
# env = LunarLanderContinuousPOMDP(hist_len=0)
env = bench.Monitor(env, logger.get_dir())
obs,state = env.reset()
ob_shapce = env.observation_space
total_shape = env.total_space
print("obs:{0}, state:{1}".format(obs, state))
print("total space:", total_shape, total_shape.shape)
print("obs space:", ob_shapce, ob_shapce.shape)


l = 0
# while True:
#     env.render(mode="rgb_array")
#     ac = env.action_space.sample()
#     [obs,state], rwd, done, _ = env.step(ac)
#     print("obs:{0}, state:{1}".format(obs, state))
#     if done:
#         break
#     l+=1
# print("Episode Length from baselines:{}".format(l))
예제 #5
0
import pandas as pd
import imageio
import numpy as np
import joblib
import tensorflow as tf

from baselines import bench, logger
from baselines.common.vec_env.vec_normalize import VecNormalize
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.env.envsetting import newenv

save_path = '/home/zhi/Documents/ReinforcementLearning/tmp'

hist_len = 10
block_hight = 5 / 8
newenv(hist_len=hist_len, block_high=block_hight, policy_name='MlpPolicy')


def make_env():
    env = gym.make('LunarLanderContinuousPOMDP-v0')
    env = bench.Monitor(env,
                        os.path.join(save_path, 'render-result'),
                        allow_early_resets=True)
    return env


env = DummyVecEnv([make_env])
env = VecNormalize(env)

ob = env.reset()