def __init__(self, config): self.config = config if config.env == 'mountaincar': self.env = gym.make('MountainCar-v0') elif config.env == 'acrobot': self.env = gym.make('Acrobot-v1') elif 'maze' in config.env: self.env = maze.MazeEnv(size=config.maze_size, time=100, holes=0, num_goal=1) self.state_buffer = [] self.reward_buffer = [] self.counter = 0 self.smax = torch.tensor(self.env.observation_space.high) self.smin = torch.tensor(self.env.observation_space.low)
def build_env_maze(horizon=200, size=20): import maze env = maze.MazeEnv(size=size, time=200, holes=0, num_goal=1) return env
def build_env(args, extra_args): if 'Lock-v0' in args.env: # Build combination lock environment import Environments env = gym.make('Lock-v0') ep_dict = { 'horizon': args.horizon, 'dimension': args.dimension, 'switch': 0.1, 'tabular': False } env.init(env_config=ep_dict) return env elif 'diabcombolock' in args.env: return build_env_homer(horizon=args.horizon, seed=args.seed) elif 'maze' in args.env: import maze args.maze_size = int(re.findall(r'\d+', args.env)[0]) env = maze.MazeEnv(size=args.maze_size, time=100, holes=0, num_goal=1) return env ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def _thunk(): import maze env = maze.MazeEnv(config=open('config/' + env_id + '.xml')) return env