Ejemplo n.º 1
0
 def _load_env():
     # Load the required env
     envname = args.env
     env = gym.make(envname)
     env.see_through_walls = args.see_through_walls
     # Make a directory if doesn't exist
     os.makedirs(log_dir, exist_ok=True)
     # Append if exists already
     append = args.load_model_name == args.model_name
     # Create monitor for env
     if args.train:
         env = Monitor(env, log_dir + '{}'.format(i), allow_early_resets=True, append=append)
     # MiniGrid
     if args.env.startswith('MiniGrid') and args.fullobs:
         env = mgwr.FullyObsWrapper(env)
         env = mgwr.ImgObsWrapper(env)
         env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False)
         # Make a goal policy for HER
         if args.algo == 'her':
             env = mgwr.GoalPolicyWrapper(env)
     elif args.env.startswith('MiniGrid') and not args.fullobs:
         env = mgwr.ImgObsWrapper(env)
         env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False)
         # else if its a minigrid but not a fullyobs
     return env
Ejemplo n.º 2
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        # minigrid
        keep_classes = ['agent', 'goal', 'wall', 'empty']
        if 'key' in env_id.lower():
            keep_classes.extend(['door', 'key'])

        if env_id.startswith('MiniGrid'):
            env = mgwr.FullyObsWrapper(env)
            env = mgwr.ImgObsWrapper(env)
            env = mgwr.FullyObsOneHotWrapper(env,
                                             drop_color=1,
                                             keep_classes=keep_classes,
                                             flatten=False)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            if env_id.startswith('CarRacing'):
                env = WarpFrame(env, width=96, height=96, grayscale=True)
                env = ScaledFloatFrame(env)
            else:
                raise NotImplementedError

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Ejemplo n.º 3
0
    def _thunk():
        if env_id.startswith("MiniGrid"):
            env = wrap.ImgObsWrapper(gym.make(env_id))
            is_minigrid = True
        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3 and not is_minigrid:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Ejemplo n.º 4
0
from tensorforce.environments import Environment, OpenAIGym
from tensorforce.execution import Runner
from tensorforce.core.parameters import Exponential
from gym_minigrid import wrappers

with tf.device('/device:CPU:0'):
    # Parameters initialization
    steps_per_episode = 50
    num_episodes = 10000
    obs_shape = (7, 7, 3)
    env_name = 'MiniGrid-Empty-6x6-v0'
    #env_name = 'MiniGrid-LavaGapS7-v0'
    #env_name = 'MiniGrid-DistShift1-v0'
    # Environment creation
    env = wrappers.gym.make(env_name)
    env = wrappers.ImgObsWrapper(env)
    num_actions = env.action_space.n
    env = Environment.create(environment=env,
                             max_episode_timesteps=steps_per_episode,
                             states=dict(type='float', shape=obs_shape),
                             actions=dict(type='int', num_values=num_actions),
                             visualize=False)
    # Agent creation
    """agent = Agent.create(agent='dqn',
                         environment=env,
                         states=dict(type='float', shape=obs_shape),
                         learning_rate=1e-3,
                         memory=100000,
                         batch_size=steps_per_episode,
                         actions=dict(type='int', num_values=num_actions),
                         exploration=dict(type='linear', unit='timesteps', 
Ejemplo n.º 5
0
    def make_env(i, this_seed):
        # Previously, we directly called `gym.make(env_name)`, but running
        # `imitation.scripts.train_adversarial` within `imitation.scripts.parallel`
        # created a weird interaction between Gym and Ray -- `gym.make` would fail
        # inside this function for any of our custom environment unless those
        # environments were also `gym.register()`ed inside `make_env`. Even
        # registering the custom environment in the scope of `make_vec_env` didn't
        # work. For more discussion and hypotheses on this issue see PR #160:
        # https://github.com/HumanCompatibleAI/imitation/pull/160.
        if isfetch:
            env = spec.make(reward_type='dense')
        else:
            env = spec.make()

        # Seed each environment with a different, non-sequential seed for diversity
        # (even if caller is passing us sequentially-assigned base seeds). int() is
        # necessary to work around gym bug where it chokes on numpy int64s.
        env.seed(int(this_seed))

        if max_episode_steps is not None:
            env = TimeLimit(env, max_episode_steps)
        elif spec.max_episode_steps is not None:
            env = TimeLimit(env, max_episode_steps=spec.max_episode_steps)

        # If minigrid, then use some postprocessing of the env
        keep_classes = ['goal', 'agent', 'wall', 'empty']
        drop_color = 1
        ename = env_name.lower()
        # for doorkey env
        flatten = False
        if 'doorkey' in ename:
            keep_classes.extend(['door', 'key'])

        if 'gotodoor' in ename:
            keep_classes.extend(['door'])
            drop_color = 0

        # for redblue doors
        if 'redblue' in ename:
            keep_classes.extend(['door'])
            drop_color = 0

        if 'lava' in ename or 'distshift' in ename:
            keep_classes.extend(['lava'])
            drop_color = 1

        # empty v1
        if 'empty' in ename and int(ename[-1]) > 0:
            keep_classes.append('lava')
            drop_color = 1

        if env_name.startswith('MiniGrid'):
            env = mgwr.FullyObsWrapper(env)
            env = mgwr.ImgObsWrapper(env)
            env = mgwr.FullyObsOneHotWrapper(env,
                                             drop_color=drop_color,
                                             keep_classes=keep_classes,
                                             flatten=flatten)
            if dac:
                env = mgwr.DACWrapper(env)

        # Use wrappers for Gym Fetch envs
        if env_name.startswith('Fetch'):
            #env = gym.wrappers.FilterObservation(env, filter_keys=['observation', 'desired_goal'])
            env = gym.wrappers.FlattenObservation(env)

        # Use Monitor to record statistics needed for Baselines algorithms logging
        # Optionally, save to disk
        log_path = None
        if log_dir is not None:
            log_subdir = os.path.join(log_dir, 'monitor')
            os.makedirs(log_subdir, exist_ok=True)
            log_path = os.path.join(log_subdir, f'{i:03d}')
        info_keywords = ()
        if isfetch:
            info_keywords = ('is_success', )
        return MonitorPlus(env,
                           log_path,
                           allow_early_resets=True,
                           info_keywords=info_keywords)