Example #1
0
def make_robotics_env(env_id, seed, rank=0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = FilterObservation(env, ['observation', 'desired_goal'])
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(), str(rank)),
                  info_keywords=('is_success', ))
    env.seed(seed)
    return env
Example #2
0
    def __init__(self):
        self.mean = 0
        self.std = 1
        self.dims = 52
        self.lb = -1 * np.ones(self.dims)
        self.ub = 1 * np.ones(self.dims)
        self.counter = 0
        self.env = FlattenObservation(
            FilterObservation(gym.make('FetchReach-v1'),
                              ['observation', 'desired_goal']))
        self.num_rollouts = 3
        self.render = False
        self.policy_shape = (4, 13)

        #tunable hyper-parameters in LA-MCTS
        self.Cp = 10
        self.leaf_size = 100
        self.kernel_type = "linear"
        self.gamma_type = "auto"
        self.ninits = 30

        print("===========initialization===========")
        print("mean:", self.mean)
        print("std:", self.std)
        print("dims:", self.dims)
        print("policy:", self.policy_shape)
Example #3
0
def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) the rank of the environment (for logging)
    :param allow_early_resets: (bool) allows early reset of the environment
    :return: (Gym Environment) The robotic environment
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    keys = ['observation', 'desired_goal']
    # TODO: remove try-except once most users are running modern Gym
    try:  # for modern Gym (>=0.15.4)
        from gym.wrappers import FilterObservation, FlattenObservation
        env = FlattenObservation(FilterObservation(env, keys))
    except ImportError:  # for older gym (<=0.15.3)
        from gym.wrappers import FlattenDictWrapper  # pytype:disable=import-error
        env = FlattenDictWrapper(env, keys)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(), str(rank)),
                  info_keywords=('is_success', ),
                  allow_early_resets=allow_early_resets)
    env.seed(seed)
    return env
Example #4
0
def make_env(with_monitor=False,folder_name='results'):
    env = gym.make("FetchReach-v1")
    env.env.reward_type = 'dense'
    env = FlattenObservation(FilterObservation(env, ['observation', 'desired_goal']))
    if with_monitor:
        env = gym.wrappers.Monitor(env, folder_name, force=True)
    return env 
Example #5
0
 def _wrap_test(self, env: gym.Env):
     env = FilterObservation(env, filter_keys=['lidar'])
     env = Flatten(env, flatten_obs=False, flatten_actions=True)
     env = NormalizeObservations(env)
     env = FixedResetMode(env, mode='grid')
     env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit)
     env = ActionRepeat(env, n=self._env_config.action_repeat)
     return env
 def _wrap_training(self, env: gym.Env):
     env = FilterObservation(env, filter_keys=['lidar'])
     env = Flatten(env, flatten_obs=True, flatten_actions=True)
     env = NormalizeObservations(env)
     env = FixedResetMode(env, mode='random')
     env = TimeLimit(env, max_episode_steps=self._env_config.training_time_limit)
     env = ActionRepeat(env, n=self._env_config.action_repeat)
     env = GymWrapper(environment=env)
     env = wrappers.SinglePrecisionWrapper(env)
     return env
Example #7
0
    def test_nested_dicts_size(self, observation_space, flat_shape):
        env = FakeEnvironment(observation_space=observation_space)

        # Make sure we are testing the right environment for the test.
        observation_space = env.observation_space
        assert isinstance(observation_space, Dict)

        wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys))
        assert wrapped_env.observation_space.shape == flat_shape

        assert wrapped_env.observation_space.dtype == np.float32
 def _wrap_test(self, env: gym.Env):
     env = FilterObservation(env, filter_keys=['lidar'])
     env = Flatten(env, flatten_obs=False, flatten_actions=True)
     env = NormalizeObservations(env)
     env = InfoToObservation(env)
     env = FixedResetMode(env, mode='grid')
     env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit)
     gym_env = ActionRepeat(env, n=self._env_config.action_repeat)
     env = GymWrapper(environment=gym_env)
     env = wrappers.SinglePrecisionWrapper(env)
     env.gym_env = gym_env
     return env
Example #9
0
def create_goal_gym_env(**kwargs):
    frames = kwargs.pop('frames', 1)
    name = kwargs.pop('name')
    limit_steps = kwargs.pop('limit_steps', False)

    env = gym.make(name, **kwargs)
    env = FlattenObservation(
        FilterObservation(env, ['observation', 'desired_goal']))

    if frames > 1:
        env = wrappers.FrameStack(env, frames, False)
    if limit_steps:
        env = wrappers.LimitStepsWrapper(env)
    return env
Example #10
0
def main():
    as_gdads = True
    name = "pointmass"
    drop_abs_position = True

    dads_env_fn = envs_fns[name]
    conf: Conf = CONFS[name]

    dict_env = as_dict_env(dads_env_fn())
    dict_env = TimeLimit(dict_env, max_episode_steps=conf.ep_len)
    if drop_abs_position:
        dict_env = DropGoalEnvsAbsoluteLocation(dict_env)
    if as_gdads:
        flat_env = SkillWrapper(env=dict_env, skill_reset_steps=conf.ep_len // 2)
    else:
        flat_obs_content = ["observation", "desired_goal", "achieved_goal"]
        if drop_abs_position:
            flat_obs_content.remove("achieved_goal")  # Because always 0 vector
        flat_env = FlattenObservation(FilterObservation(dict_env, filter_keys=flat_obs_content))

    flat_env = TransformReward(flat_env, f=lambda r: r*conf.reward_scaling)
    flat_env = Monitor(flat_env)

    filename = f"modelsCommandSkills/{name}-gdads{as_gdads}"
    if os.path.exists(filename + ".zip"):
        sac = SAC.load(filename, env=flat_env)
        if as_gdads:
            flat_env.load(filename)
    else:
        sac = SAC("MlpPolicy", env=flat_env, verbose=1, learning_rate=conf.lr,
                  tensorboard_log=f"{filename}-tb", buffer_size=10000)
        train(model=sac, conf=conf, save_fname=filename)
        if as_gdads:
            flat_env.save(filename)

    if as_gdads:
        flat_env.set_sac(sac)
        eval_dict_env(dict_env=dict_env,
                      model=flat_env,
                      ep_len=conf.ep_len)
    show(model=sac, env=flat_env, conf=conf)
Example #11
0
def make_robotics_env(env_id, seed, rank=0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    #env = FlattenDictWrapper(env, ['observation', 'desired_goal'])
    keys = ['observation', 'desired_goal']
    # TODO: remove try-except once most users are running modern Gym
    try:  # for modern Gym (>=0.15.4)
        from gym.wrappers import FilterObservation, FlattenObservation
        env = FlattenObservation(FilterObservation(env, keys))
    except ImportError:  # for older gym (<=0.15.3)
        from gym.wrappers import FlattenDictWrapper  # pytype:disable=import-error
        env = FlattenDictWrapper(env, keys)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(), str(rank)),
                  info_keywords=('is_success', ))
    env.seed(seed)
    return env
Example #12
0
def make_env(env_id,
             env_type,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             env_kwargs=None,
             logger_dir=None,
             initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*', '', env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    elif env_type == 'robotics':
        env = gym.make(env_id, **env_kwargs)
        env = DoneOnSuccessWrapper(env)
    else:
        env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        if env_type == 'robotics':
            env = FlattenObservation(
                FilterObservation(env, ['observation', 'desired_goal']))
        else:
            env = FlattenObservation(env)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Example #13
0
 def test_nested_dicts_ravel(self, observation_space, flat_shape):
     env = FakeEnvironment(observation_space=observation_space)
     wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys))
     obs = wrapped_env.reset()
     assert obs.shape == wrapped_env.observation_space.shape
Example #14
0
def make_toylab_dads_env(**kwargs):
    env = DADSCustomToyLabEnv()
    env = ObsAsOrderedDict(env)
    env = FilterObservation(env, filter_keys=["achieved_goal"])
    env = FlattenObservation(env)
    return DADSWrapper(env, **kwargs)
Example #15
0
def load(environment_name,
         env_id=None,
         concat_desired_goal=True,
         discount=1.0,
         max_episode_steps=None,
         sparse_reward=False,
         use_success_wrapper=True,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         wrap_with_process=False):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        environment_name: Name for the environment to load.
        env_id: A scalar ``Tensor`` of the environment ID of the time step.
        discount: Discount to use for the environment.
        max_episode_steps: If None the ``max_episode_steps`` will be set to the default
            step limit defined in the environment's spec. No limit is applied if set
            to 0 or if there is no ``timestep_limit`` set in the environment's spec.
        sparse_reward (bool): If True, the game ends once the goal is achieved.
            Rewards will be added by 1, changed from -1/0 to 0/1.
        use_success_wrapper (bool): If True, wraps the environment with the
            SuccessWrapper which will record Success info after a specified
            amount of timesteps.
        gym_env_wrappers: Iterable with references to wrapper classes to use
            directly on the gym environment.
        alf_env_wrappers: Iterable with references to wrapper classes to use on
            the torch environment.

    Returns:
        An AlfEnvironment instance.
    """
    assert (environment_name.startswith("Fetch")
            or environment_name.startswith("HandManipulate")), (
                "This suite only supports OpenAI's Fetch and ShadowHand envs!")

    _unwrapped_env_checker_.check_and_update(wrap_with_process)

    gym_spec = gym.spec(environment_name)
    env = gym_spec.make()

    if max_episode_steps is None:
        if gym_spec.max_episode_steps is not None:
            max_episode_steps = gym_spec.max_episode_steps
        else:
            max_episode_steps = 0

    def env_ctor(env_id=None):
        return suite_gym.wrap_env(
            env,
            env_id=env_id,
            discount=discount,
            max_episode_steps=max_episode_steps,
            gym_env_wrappers=gym_env_wrappers,
            alf_env_wrappers=alf_env_wrappers,
            image_channel_first=False)

    # concat robot's observation and the goal location
    if concat_desired_goal:
        keys = ["observation", "desired_goal"]
        try:  # for modern Gym (>=0.15.4)
            from gym.wrappers import FilterObservation, FlattenObservation
            env = FlattenObservation(FilterObservation(env, keys))
        except ImportError:  # for older gym (<=0.15.3)
            from gym.wrappers import FlattenDictWrapper  # pytype:disable=import-error
            env = FlattenDictWrapper(env, keys)
    if use_success_wrapper:
        env = SuccessWrapper(env, max_episode_steps)
    env = ObservationClipWrapper(env)
    if sparse_reward:
        env = SparseReward(env)

    if wrap_with_process:
        process_env = process_environment.ProcessEnvironment(
            functools.partial(env_ctor))
        process_env.start()
        torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env)
    else:
        torch_env = env_ctor(env_id=env_id)

    return torch_env
Example #16
0
    def train(self):
        """Method for training the Network"""

        for epoch in range(self.n_epochs):
            for episode in range(self.n_episodes):

                done = False
                score = 0

                episode_experience = []

                # Reset the environment to it's initial state
                observation = self.env.reset()
                observation = FlattenObservation(
                    FilterObservation(
                        observation,
                        ['observation', 'achieved_goal', 'desired_goal']))

                # Because we are not working with a continous action space,
                # we are limiting ourselfs to a finite number of timesteps
                # per episode, other wise the below for loop would be replaced
                # with `while not done:`

                for _ in range(self.n_time_steps):

                    self.env.render()
                    action = self.act(observation['observation'])
                    print(action)
                    new_observation, reward, done, info = self.env.step(action)

                    score += reward

                    episode_experience.append(
                        (observation['observation'], action, reward,
                         new_observation['observation'], done))

                    self.save(np.asarray(observation['observation']), action,
                              reward, new_observation['observation'], done)

                    observation = new_observation
                    self.learn()

                    # break if we finish the environment
                    if done is True:
                        break

                # HER Algorithm
                for t in range(len(episode_experience)):
                    for _ in range(self.K):
                        future = np.random.randint(t, len(episode_experience))
                        goal = episode_experience[future][3]
                        state = episode_experience[t][0]
                        action = episode_experience[t][1]
                        next_state = episode_experience[t][3]
                        done = np.array_equal(next_state, goal)
                        reward = 0 if done else -1

                        self.save(state, action, reward, next_state, done)

            # save model every 5 epochs
            # this is an arbitrary number and will change
            if epoch % 10 == 0 and epoch > 0:
                self.save_model
Example #17
0
def _process_goalenv(env: GoalEnv, **kwargs):
    env = FilterObservation(env, filter_keys=["observation"])
    env = FlattenObservation(env)
    return DADSWrapper(env, **kwargs)
Example #18
0
import multiprocessing
import neat
import numpy as np
import os
import pickle
import random
import time
from gym.wrappers import FlattenObservation, FilterObservation
import visualize

NUM_CORES = 1

env = gym.make('FetchReach-v1')
env.env.reward_type = 'dense'
env = FlattenObservation(FilterObservation(env, ['observation', 'desired_goal']))

print("action space: {0!r}".format(env.action_space))
print("observation space: {0!r}".format(env.observation_space))

env = gym.wrappers.Monitor(env, 'results', force=True)

class RoboGenome(neat.DefaultGenome):
    def __init__(self, key):
        super().__init__(key)
        self.discount = None

    def configure_new(self, config):
        super().configure_new(config)
        self.discount = 0.01 + 0.98 * random.random()
Example #19
0
def flatten_env(dict_env, drop_abs_position):
    flat_obs_content = ["observation", "desired_goal", "achieved_goal"]
    if drop_abs_position:
        flat_obs_content.remove("achieved_goal")  # Because always 0 vector
    return FlattenObservation(
        FilterObservation(dict_env, filter_keys=flat_obs_content))
Example #20
0
def _make_flat(*args, **kargs):
    if "FlattenDictWrapper" in dir():
        return FlattenDictWrapper(*args, **kargs)
    return FlattenObservation(FilterObservation(*args, **kargs))