def _load_env(): # Load the required env envname = args.env env = gym.make(envname) env.see_through_walls = args.see_through_walls # Make a directory if doesn't exist os.makedirs(log_dir, exist_ok=True) # Append if exists already append = args.load_model_name == args.model_name # Create monitor for env if args.train: env = Monitor(env, log_dir + '{}'.format(i), allow_early_resets=True, append=append) # MiniGrid if args.env.startswith('MiniGrid') and args.fullobs: env = mgwr.FullyObsWrapper(env) env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False) # Make a goal policy for HER if args.algo == 'her': env = mgwr.GoalPolicyWrapper(env) elif args.env.startswith('MiniGrid') and not args.fullobs: env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False) # else if its a minigrid but not a fullyobs return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) # minigrid keep_classes = ['agent', 'goal', 'wall', 'empty'] if 'key' in env_id.lower(): keep_classes.extend(['door', 'key']) if env_id.startswith('MiniGrid'): env = mgwr.FullyObsWrapper(env) env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: if env_id.startswith('CarRacing'): env = WarpFrame(env, width=96, height=96, grayscale=True) env = ScaledFloatFrame(env) else: raise NotImplementedError # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3: env = TransposeImage(env, op=[2, 0, 1]) return env
def _thunk(): if env_id.startswith("MiniGrid"): env = wrap.ImgObsWrapper(gym.make(env_id)) is_minigrid = True elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if add_timestep and len( obs_shape) == 1 and str(env).find('TimeLimit') > -1: env = AddTimestep(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3 and not is_minigrid: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
from tensorforce.environments import Environment, OpenAIGym from tensorforce.execution import Runner from tensorforce.core.parameters import Exponential from gym_minigrid import wrappers with tf.device('/device:CPU:0'): # Parameters initialization steps_per_episode = 50 num_episodes = 10000 obs_shape = (7, 7, 3) env_name = 'MiniGrid-Empty-6x6-v0' #env_name = 'MiniGrid-LavaGapS7-v0' #env_name = 'MiniGrid-DistShift1-v0' # Environment creation env = wrappers.gym.make(env_name) env = wrappers.ImgObsWrapper(env) num_actions = env.action_space.n env = Environment.create(environment=env, max_episode_timesteps=steps_per_episode, states=dict(type='float', shape=obs_shape), actions=dict(type='int', num_values=num_actions), visualize=False) # Agent creation """agent = Agent.create(agent='dqn', environment=env, states=dict(type='float', shape=obs_shape), learning_rate=1e-3, memory=100000, batch_size=steps_per_episode, actions=dict(type='int', num_values=num_actions), exploration=dict(type='linear', unit='timesteps',
def make_env(i, this_seed): # Previously, we directly called `gym.make(env_name)`, but running # `imitation.scripts.train_adversarial` within `imitation.scripts.parallel` # created a weird interaction between Gym and Ray -- `gym.make` would fail # inside this function for any of our custom environment unless those # environments were also `gym.register()`ed inside `make_env`. Even # registering the custom environment in the scope of `make_vec_env` didn't # work. For more discussion and hypotheses on this issue see PR #160: # https://github.com/HumanCompatibleAI/imitation/pull/160. if isfetch: env = spec.make(reward_type='dense') else: env = spec.make() # Seed each environment with a different, non-sequential seed for diversity # (even if caller is passing us sequentially-assigned base seeds). int() is # necessary to work around gym bug where it chokes on numpy int64s. env.seed(int(this_seed)) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps) elif spec.max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=spec.max_episode_steps) # If minigrid, then use some postprocessing of the env keep_classes = ['goal', 'agent', 'wall', 'empty'] drop_color = 1 ename = env_name.lower() # for doorkey env flatten = False if 'doorkey' in ename: keep_classes.extend(['door', 'key']) if 'gotodoor' in ename: keep_classes.extend(['door']) drop_color = 0 # for redblue doors if 'redblue' in ename: keep_classes.extend(['door']) drop_color = 0 if 'lava' in ename or 'distshift' in ename: keep_classes.extend(['lava']) drop_color = 1 # empty v1 if 'empty' in ename and int(ename[-1]) > 0: keep_classes.append('lava') drop_color = 1 if env_name.startswith('MiniGrid'): env = mgwr.FullyObsWrapper(env) env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=drop_color, keep_classes=keep_classes, flatten=flatten) if dac: env = mgwr.DACWrapper(env) # Use wrappers for Gym Fetch envs if env_name.startswith('Fetch'): #env = gym.wrappers.FilterObservation(env, filter_keys=['observation', 'desired_goal']) env = gym.wrappers.FlattenObservation(env) # Use Monitor to record statistics needed for Baselines algorithms logging # Optionally, save to disk log_path = None if log_dir is not None: log_subdir = os.path.join(log_dir, 'monitor') os.makedirs(log_subdir, exist_ok=True) log_path = os.path.join(log_subdir, f'{i:03d}') info_keywords = () if isfetch: info_keywords = ('is_success', ) return MonitorPlus(env, log_path, allow_early_resets=True, info_keywords=info_keywords)