Example #1
0
def wrap_environment(wrapped_class, wrappers=None, **kwargs):
    """Helper for wrapping environment classes."""
    if wrappers is None:
        wrappers = []

    env_class = load(wrapped_class)
    env = env_class(**kwargs)
    for wrapper, wrapper_kwargs in wrappers:
        wrapper_class = load(wrapper)
        wrapper = wrapper_class(**wrapper_kwargs)
        env = wrapper(env)

    return env
Example #2
0
def mujoco_wrapper(entry_point, **kwargs):
    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)
    # Normalization wrapper
    env = NormalizedActionWrapper(env)
    return env
Example #3
0
        def make_discrete_task_by_id(
            env: str,
            **kwargs,
        ) -> Union[Dict[str, Any], Any]:
            # Load the entry-point class, and use it to determine what handler to use.
            # TODO: Actually instantiate the env here? or just dispatch based on the env class?
            if env not in env_registry.env_specs:
                raise RuntimeError(
                    f"Can't create a task for env id {env}, since it isn't a registered env id."
                )
            env_spec: EnvSpec = env_registry.env_specs[env]
            env_entry_point: Callable[...,
                                      gym.Env] = load(env_spec.entry_point)
            # import inspect

            try:
                task: ContinuousTask = make_discrete_task_from_type(
                    env_entry_point, **kwargs)
                return task

            except RuntimeError as exc:
                warnings.warn(
                    RuntimeWarning(
                        f"A temporary environment will have to be created in order to make a task: {exc}"
                    ))

            with gym.make(env) as temp_env:
                # IDEA: Could avoid re-creating the env between calls to this function, for
                # instance by saving a single temp env in a global variable and overwriting
                # it if `env` is of a different type.
                return make_task_fn(temp_env, **kwargs)
def universe_wrapper(entry_point, **kwargs):
    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)

    # Preprocessing wrappers
    env = MaxAndSkipEnv(env, skip=4)
    env = WarpFrame(env, dim=84, rgb=True)
    env = ScaledFloatFrame(env)
    return env
Example #5
0
    def of(
        cls,
        original: EnvSpec,
        *,
        new_id: str,
        new_reward_threshold: Optional[float] = None,
        new_nondeterministic: Optional[bool] = None,
        new_max_episode_steps: Optional[int] = None,
        new_kwargs: Dict[str, Any] = None,
        new_entry_point: Union[str, Callable[..., gym.Env]] = None,
        wrappers: Optional[List[Callable[[gym.Env], gym.Env]]] = None,
    ) -> "VariantEnvSpec":
        """ Returns a new env spec which uses additional wrappers.
        
        NOTE: The `new_kwargs` update the current kwargs, rather than replacing them.
        """
        new_spec_kwargs = original._kwargs
        new_spec_kwargs.update(new_kwargs or {})
        # Replace the entry-point if desired:
        new_spec_entry_point: Union[str, Callable[
            ..., EnvType]] = new_entry_point or original.entry_point

        new_reward_threshold = (new_reward_threshold if new_reward_threshold
                                is not None else original.reward_threshold)
        new_nondeterministic = (new_nondeterministic if new_nondeterministic
                                is not None else original.nondeterministic)
        new_max_episode_steps = (new_max_episode_steps if new_max_episode_steps
                                 is not None else original.max_episode_steps)

        # Add wrappers if desired.
        if wrappers:
            # Get the callable that creates the env.
            if callable(original.entry_point):
                env_fn = original.entry_point
            else:
                env_fn = load(original.entry_point)
            # @lebrice Not sure if there is a cleaner way to do this, maybe using
            # functools.reduce or functools.partial?
            def _new_entry_point(**kwargs) -> gym.Env:
                env = env_fn(**kwargs)
                for wrapper in wrappers:
                    env = wrapper(env)
                return env

            new_spec_entry_point = _new_entry_point

        return cls(
            new_id,
            base_spec=original,
            entry_point=new_spec_entry_point,
            reward_threshold=new_reward_threshold,
            nondeterministic=new_nondeterministic,
            max_episode_steps=new_max_episode_steps,
            kwargs=new_spec_kwargs,
        )
Example #6
0
def rand_wrapper(entry_point, **kwargs):
    
    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)
    
    # Randomization wrapper
    env = NormalizedActionWrapper(RandomizedEnvWrapper(env))
    # env = RandomizedEnvWrapper(env)
    
    return env
def mujoco_wrapper(entry_point, **kwargs):
    normalization_scale = kwargs.pop('normalization_scale', 1.)
    max_episode_steps = kwargs.pop('max_episode_steps', 200)

    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)

    # Normalization wrapper
    env = NormalizedActionWrapper(env, scale=normalization_scale)

    # Time limit
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    return env
Example #8
0
def get_env_class(
    env: Union[str, gym.Env, Type[gym.Env], Callable[[], gym.Env]]
) -> Type[gym.Env]:
    if isinstance(env, partial):
        if env.func is gym.make and isinstance(env.args[0], str):
            return get_env_class(env.args[0])
        return get_env_class(env.func)
    if isinstance(env, str):
        return load(env)
    if isinstance(env, gym.Wrapper):
        return type(env.unwrapped)
    if isinstance(env, gym.Env):
        return type(env)
    if inspect.isclass(env) and issubclass(env, gym.Env):
        return env
    raise NotImplementedError(
        f"Don't know how to get the class of env being used by {env}!")
Example #9
0
def _make(id_, env_kwargs=None):
    """
    Recreating the gym make function from gym/envs/registration.py
    as such as it can support extra arguments for the environment
    :param id_: (str) The environment ID
    :param env_kwargs: (dict) The extra arguments for the environment
    """
    if env_kwargs is None:
        env_kwargs = {}

    # getting the spec from the ID we want
    spec = registry.spec(id_)

    # Keeping the checks and safe guards of the old code
    assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \
                                          '(HINT: is there a newer registered version of this env?)'.format(spec.id_)

    if callable(spec._entry_point):
        env = spec._entry_point(**env_kwargs)
    else:
        cls = load(spec._entry_point)
        # create the env, with the original kwargs, and the new ones overriding them if needed
        env = cls(**{**spec._kwargs, **env_kwargs})

    # Make the enviroment aware of which spec it came from.
    env.unwrapped.spec = spec

    # Keeping the old patching system for _reset, _step and timestep limit
    if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(
            env, "_gym_disable_underscore_compat", False):
        patch_deprecated_methods(env)
    if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
        from gym.wrappers.time_limit import TimeLimit
        env = TimeLimit(env,
                        max_episode_steps=env.spec.max_episode_steps,
                        max_episode_seconds=env.spec.max_episode_seconds)
    return env
Example #10
0
def create_test_env(env_id, n_envs=1, is_atari=False,
                    stats_path=None, seed=0,
                    log_dir='', should_render=True, hyperparams=None):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param is_atari: (bool)
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :param env_wrapper: (type) A subclass of gym.Wrapper to wrap the original
                        env with
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)
    if 'env_wrapper' in hyperparams.keys():
        del hyperparams['env_wrapper']

    if is_atari:
        print("Using Atari wrapper")
        #env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        ## Frame-stacking with 4 frames
        #env = VecFrameStack(env, n_stack=4)
    elif n_envs > 1:
        # start_method = 'spawn' for thread safe
        env = SubprocVecEnv([make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper) for i in range(n_envs)])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        spec = gym.envs.registry.env_specs[env_id]
        try:
            class_ = load(spec.entry_point)
        except AttributeError:
            # Backward compatibility with gym
            class_ = load(spec._entry_point)
        # HACK: force SubprocVecEnv for Bullet env that does not
        # have a render argument
        render_name = None
        use_subproc = 'renders' not in inspect.getfullargspec(class_.__init__).args
        if not use_subproc:
            render_name = 'renders'
        # Dev branch of pybullet
        # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args
        # if not use_subproc and render_name is None:
        #     render_name = 'render'

        # Create the env, with the original kwargs, and the new ones overriding them if needed
        def _init():
            # TODO: fix for pybullet locomotion envs
            env = class_(**{**spec._kwargs}, **{render_name: should_render})
            env.seed(0)
            if log_dir is not None:
                env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True)
            return env

        if use_subproc:
            env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])
        else:
            env = DummyVecEnv([_init])
    else:
        env = DummyVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs'])
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Example #11
0
def mujoco_wrapper(entry_point, **kwargs):
    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)
    return env
Example #12
0
def get_class_and_kwargs(spec_or_id):
    if isinstance(spec_or_id, registration.EnvSpec):
        spec = spec_or_id
    else:
        spec = registration.spec(spec_or_id)
    return registration.load(spec._entry_point), spec._kwargs
Example #13
0
def create_test_env(env_id,
                    n_envs=1,
                    stats_path=None,
                    seed=0,
                    log_dir=None,
                    should_render=True,
                    hyperparams=None,
                    env_params={}):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :param hyperparams: (dict) Additional hyperparams for the env (ex: n_stack)
    :param env_params: (dict) the parameters to change in env
    :return: (gym.Env)
    """
    # If the environment is not found, suggest the closest match
    registered_envs = set(gym.envs.registry.env_specs.keys())
    if env_id not in registered_envs:
        closest_match = difflib.get_close_matches(env_id, registered_envs,
                                                  n=1)[0]
        raise ValueError(
            '{} not found in gym registry, you maybe meant {}?'.format(
                env_id, closest_match))

    is_atari = 'NoFrameskip' in env_id

    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'log'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    # Create the environment and wrap it if necessary
    if is_atari:
        print("Using Atari wrapper")
        env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        # Frame-stacking with 4 frames
        env = VecFrameStack(env, n_stack=4)
    elif n_envs > 1:
        env = SubprocVecEnv([
            make_env(env_id,
                     i,
                     seed,
                     log_dir,
                     env_params=env_params,
                     params_path=stats_path) for i in range(n_envs)
        ])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        spec = gym.envs.registry.env_specs[env_id]
        class_ = load(spec._entry_point)
        # HACK: force SubprocVecEnv for Bullet env that does not
        # have a render argument
        render_name = None
        use_subproc = 'renders' not in inspect.getfullargspec(
            class_.__init__).args
        if not use_subproc:
            render_name = 'renders'
        # Dev branch of pybullet
        # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args
        # if not use_subproc and render_name is None:
        #     render_name = 'render'

        # Create the env, with the original kwargs, and the new ones overriding them if needed
        def _init():
            # TODO: fix for pybullet locomotion envs
            env = class_(**{**spec._kwargs}, **{render_name: should_render})
            if len(env_params) > 0:
                env = modify_env_params(env, stats_path, **env_params)
            env.seed(0)
            if log_dir is not None:
                env = Monitor(env,
                              os.path.join(log_dir, "0"),
                              allow_early_resets=True)
            return env

        if use_subproc:
            env = SubprocVecEnv([
                make_env(env_id,
                         0,
                         seed,
                         log_dir,
                         env_params=env_params,
                         params_path=stats_path)
            ])
        else:
            env = DummyVecEnv([_init])
    else:
        env = DummyVecEnv([
            make_env(env_id,
                     0,
                     seed,
                     log_dir,
                     env_params=env_params,
                     params_path=stats_path)
        ])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            if 'norm_reward' in hyperparams['normalize_kwargs']:
                del hyperparams['normalize_kwargs']['norm_reward']

            env = VecNormalize(env,
                               training=False,
                               **hyperparams['normalize_kwargs'],
                               norm_reward=False)
            env.load_running_average(stats_path)

        n_stack = hyperparams.get('n_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Example #14
0
def create_test_env(env_id,
                    n_envs=1,
                    is_atari=False,
                    stats_path=None,
                    norm_reward=False,
                    seed=0,
                    log_dir='',
                    should_render=True):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param is_atari: (bool)
    :param stats_path: (str) path to folder containing saved running averaged
    :param norm_reward: (bool) Whether to normalize rewards or not when using Vecnormalize
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :return: (gym.Env)
    """
    # HACK to save logs
    if log_dir is not None:
        os.environ["OPENAI_LOG_FORMAT"] = 'csv'
        os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
        os.makedirs(log_dir, exist_ok=True)
        logger.configure()

    # Create the environment and wrap it if necessary
    if is_atari:
        print("Using Atari wrapper")
        env = make_atari_env(env_id, num_env=n_envs, seed=seed)
        # Frame-stacking with 4 frames
        env = VecFrameStack(env, n_stack=4)
    elif n_envs > 1:
        env = SubprocVecEnv(
            [make_env(env_id, i, seed, log_dir) for i in range(n_envs)])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        spec = gym.envs.registry.env_specs[env_id]
        class_ = load(spec._entry_point)
        # HACK: force SubprocVecEnv for Bullet env that does not
        # have a render argument
        use_subproc = 'renders' not in inspect.getfullargspec(
            class_.__init__).args

        # Create the env, with the original kwargs, and the new ones overriding them if needed
        def _init():
            # TODO: fix for pybullet locomotion envs
            env = class_(**{**spec._kwargs}, renders=should_render)
            env.seed(0)
            if log_dir is not None:
                env = Monitor(env,
                              os.path.join(log_dir, "0"),
                              allow_early_resets=True)
            return env

        if use_subproc:
            env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir)])
        else:
            env = DummyVecEnv([_init])
    else:
        env = DummyVecEnv([make_env(env_id, 0, seed, log_dir)])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if os.path.join(stats_path, 'obs_rms.pkl'):
            print("Loading running average")
            env = VecNormalize(env, training=False, norm_reward=norm_reward)
            env.load_running_average(stats_path)

        n_stack_file = os.path.join(stats_path, 'n_stack')
        if os.path.isfile(n_stack_file):
            with open(n_stack_file, 'r') as f:
                n_stack = int(f.read())
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env