def __init__(self, space, null_value=0, name="obs", force_float32=True,
              schemas=None):
     """Input ``space`` is a gym space instance.  
     
     Input ``name`` governs naming of internal NamedTupleSchemas used to
     store Gym info.
     """
     self._gym_space = space
     self._base_name = name
     self._null_value = null_value
     if schemas is None:
         schemas = {}
     self._schemas = schemas
     if isinstance(space, GymDict):
         nt = self._schemas.get(name)
         if nt is None:
             nt = NamedTupleSchema(name, [k for k in space.spaces.keys()])
             schemas[name] = nt  # Put at module level for pickle.
         elif not (isinstance(nt, NamedTupleSchema) and
                 sorted(nt._fields) ==
                 sorted([k for k in space.spaces.keys()])):
             raise ValueError(f"Name clash in schemas: {name}.")
         spaces = [GymSpaceWrapper(
             space=v,
             null_value=null_value,
             name="_".join([name, k]),
             force_float32=force_float32,
             schemas=schemas)
             for k, v in space.spaces.items()]
         self.space = Composite(spaces, nt)
         self._dtype = None
     else:
         self.space = space
         self._dtype = np.float32 if (space.dtype == np.float64 and
             force_float32) else None
Example #2
0
 def __init__(self, space, null_value=0, name="obs", force_float32=True):
     """Input ``space`` is a gym space instance.  
     
     Input ``name`` is used to disambiguate different gym spaces being
     wrapped, which is necessary if more than one GymDict space is to be
     wrapped in the same file.  The reason is that the associated
     namedtuples must be defined in the globals of this file, so they must
     have distinct names.
     """
     self._gym_space = space
     self._base_name = name
     self._null_value = null_value
     if isinstance(space, GymDict):
         nt = globals().get(name)
         if nt is None:
             nt = namedtuple(name, [k for k in space.spaces.keys()])
             globals()[name] = nt  # Put at module level for pickle.
         elif not (is_namedtuple_class(nt) and
                 sorted(nt._fields) ==
                 sorted([k for k in space.spaces.keys()])):
             raise ValueError(f"Name clash in globals: {name}.")
         spaces = [GymSpaceWrapper(
             space=v,
             null_value=null_value,
             name="_".join([name, k]),
             force_float32=force_float32)
             for k, v in space.spaces.items()]
         self.space = Composite(spaces, nt)
         self._dtype = None
     else:
         self.space = space
         self._dtype = np.float32 if (space.dtype == np.float64 and
             force_float32) else None
Example #3
0
 def __init__(self, space, null_value=0, name="obs", force_float32=True):
     self._gym_space = space
     self._base_name = name
     self._null_value = null_value
     if isinstance(space, GymDict):
         nt = globals().get(name)
         if nt is None:
             nt = namedtuple(name, [k for k in space.spaces.keys()])
             globals()[name] = nt  # Put at module level for pickle.
         elif not (is_namedtuple_class(nt) and sorted(nt._fields) == sorted(
             [k for k in space.spaces.keys()])):
             raise ValueError(f"Name clash in globals: {name}.")
         spaces = [
             GymSpaceWrapper(space=v,
                             null_value=null_value,
                             name="_".join([name, k]),
                             force_float32=force_float32)
             for k, v in space.spaces.items()
         ]
         self.space = Composite(spaces, nt)
         self._dtype = None
     else:
         self.space = space
         self._dtype = np.float32 if (space.dtype == np.float64
                                      and force_float32) else None
Example #4
0
 def __init__(self):
     self.end_pos = 10
     self.cur_pos = 0
     # self._action_space = IntBox(low=0, high=2, shape=(2,))
     self._action_space = IntBox(low=0, high=2)
     self._observation_space = Composite(
         [
             FloatBox(low=0, high=self.end_pos),
             FloatBox(low=0, high=self.end_pos)
         ],
         Obs,
     )
Example #5
0
 def __init__(self, env, task_id, variant_id, source_id, num_tasks,
              max_num_variants, num_demo_sources):
     super().__init__(env)
     assert isinstance(task_id, int) and isinstance(variant_id, int)
     self._task_id_np = np.asarray([task_id]).reshape(())
     self._variant_id_np = np.asarray([variant_id]).reshape(())
     self._source_id = np.asarray([source_id]).reshape(())
     task_space = IntBox(0, num_tasks)
     variant_space = IntBox(0, max_num_variants)
     # remember, 0 is the novice!
     source_space = IntBox(0, num_demo_sources + 1)
     self.observation_space = Composite(
         (env.observation_space, task_space, variant_space, source_space),
         EnvIDObs)
Example #6
0
 def __init__(self, wrapped_env, act_null_value=0, force_float32=True):
     self._wrapped_env = wrapped_env
     action_dim = 3
     self.action_space = GymSpaceWrapper(
         space=gym.spaces.Box(low=-1,
                              high=1,
                              shape=(action_dim, ),
                              dtype=np.float32),
         name="act",
         null_value=act_null_value,
         force_float32=force_float32,
     )
     self.observation_space = Composite([
         Box(low=-np.inf, high=np.inf, shape=(64, 64, 3), dtype=np.float32),
         Box(np.array([-1] * 100), np.array([1] * 100), dtype=np.float32)
     ], OBS)
     self.spaces = EnvSpaces(observation=self.observation_space,
                             action=self.action_space)
     self._dtype = None
     self.current_location = None
Example #7
0
def convert_dm_control_to_rlpyt_space(dm_control_space):
    """Recursively convert dm_control_space into gym space.

    Note: Need to check the following cases of the input type, in the following
    order:
       (1) BoundedArray
       (2) Array
       (3) OrderedDict.

    - Generally, dm_control observation_specs are OrderedDict with other spaces
      (e.g. Array) nested in it.
    - Generally, dm_control action_specs are of type `BoundedArray`.

    To handle dm_control observation_specs as inputs, we check the following
    input types in order to enable recursive calling on each nested item.
    """
    if isinstance(dm_control_space, BoundedArray):
        rlpyt_box = Box(low=dm_control_space.minimum,
                        high=dm_control_space.maximum,
                        shape=None,
                        dtype=dm_control_space.dtype)
        assert rlpyt_box.shape == dm_control_space.shape, ((
            rlpyt_box.shape, dm_control_space.shape))
        return rlpyt_box
    elif isinstance(dm_control_space, Array):
        if isinstance(dm_control_space, BoundedArray):
            raise ValueError("The order of the if-statements matters.")
        return Box(low=-float("inf"),
                   high=float("inf"),
                   shape=dm_control_space.shape,
                   dtype=dm_control_space.dtype)
    elif isinstance(dm_control_space, OrderedDict):
        global State
        if State is None:
            State = namedtuple('State', list(dm_control_space.keys()))
        return Composite([
            convert_dm_control_to_rlpyt_space(value)
            for value in dm_control_space.values()
        ], State)
    else:
        raise ValueError(dm_control_space)
Example #8
0
    def __init__(self):
        self.cur_step = 0
        self.width = CANVAS_WIDTH
        self.obj_wh = np.array([OBJ_WIDTH, OBJ_WIDTH], dtype=np.float32) / self.width
        self.n_items = N_ITEMS

        self._action_space = IntBox(low=0, high=N_ACTIONS)
        self._observation_space = Composite(
            [
                FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)),
                FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)),
                FloatBox(low=-10, high=10, shape=(self.n_items, 4)),
            ],
            Obs,
        )
        # self._observation_space = FloatBox(
        #     low=-1, high=1, shape=(self.width, self.width, 1)
        # )
        
        self.target_im = np.zeros(shape=(self.width, self.width, 1), dtype=np.float32)
        self.target_coord = None  # (n_obj, 4=(x0, y0, x1, y1))
        self.cur_im = None
        self.cur_coord = None  # (n_obj, 4=(x0, y0, x1, y1))
        self.item = None
Example #9
0
class GymSpaceWrapper:
    """Wraps a gym space to match the rlpyt interface; most of
    the functionality is for automatically converting a GymDict (dictionary)
    space into an rlpyt Composite space (and converting between the two).  Use
    inside the initialization of the environment wrapper for a gym environment.
    """
    def __init__(self,
                 space,
                 null_value=0,
                 name="obs",
                 force_float32=True,
                 schemas=None):
        """Input ``space`` is a gym space instance.

        Input ``name`` governs naming of internal NamedTupleSchemas used to
        store Gym info.
        """
        self._gym_space = space
        self._base_name = name
        self._null_value = null_value
        if schemas is None:
            schemas = {}
        self._schemas = schemas
        if isinstance(space, GymDict):
            nt = self._schemas.get(name)
            if nt is None:
                nt = NamedTupleSchema(name, [k for k in space.spaces.keys()])
                schemas[name] = nt  # Put at module level for pickle.
            elif not (isinstance(nt, NamedTupleSchema) and sorted(nt._fields)
                      == sorted([k for k in space.spaces.keys()])):
                raise ValueError(f"Name clash in schemas: {name}.")
            spaces = [
                GymSpaceWrapper(
                    space=v,
                    null_value=null_value,
                    name="_".join([name, k]),
                    force_float32=force_float32,
                    schemas=schemas,
                ) for k, v in space.spaces.items()
            ]
            self.space = Composite(spaces, nt)
            self._dtype = None
        else:
            self.space = space
            self._dtype = (np.float32 if (space.dtype == np.float64
                                          and force_float32) else None)

    def sample(self):
        """Returns a single sample in a namedtuple (for composite) or numpy
        array using the the ``sample()`` method of the underlying gym
        space(s)."""
        sample = self.space.sample()
        if self.space is self._gym_space:  # Not Composite.
            # Force numpy array, might force float64->float32.
            sample = np.asarray(sample, dtype=self._dtype)
        return sample

    def null_value(self):
        """Similar to ``sample()`` but returning a null value."""
        if self.space is self._gym_space:
            null = np.asarray(self.space.sample(), dtype=self._dtype)
            if self._null_value is not None:
                try:
                    null[:] = self._null_value
                except IndexError:  # e.g. scalar.
                    null.fill(self._null_value)
            else:
                null.fill(0)
        else:  # Is composite.
            null = self.space.null_value()
        return null

    def convert(self, value):
        """For dictionary space, use to convert wrapped env's dict to rlpyt
        namedtuple, i.e. inside the environment wrapper's ``step()``, for
        observation output to the rlpyt sampler (see helper function in
        file)"""
        return dict_to_nt(value, name=self._base_name, schemas=self._schemas)

    def revert(self, value):
        """For dictionary space, use to revert namedtuple action into wrapped
        env's dict, i.e. inside the environment wrappers ``step()``, for input
        to the underlying gym environment (see helper function in file)."""
        return nt_to_dict(value)

    @property
    def dtype(self):
        return self._dtype or self.space.dtype

    @property
    def shape(self):
        return self.space.shape

    def contains(self, x):
        return self.space.contains(x)

    def __repr__(self):
        return self.space.__repr__()

    def __eq__(self, other):
        return self.space.__eq__(other)

    @property
    def low(self):
        return self.space.low

    @property
    def high(self):
        return self.space.high

    @property
    def n(self):
        return self.space.n

    def seed(self, seed=None):
        if type(self.space) is Composite:
            return [space.seed(seed=seed) for space in self.space.spaces]
        else:
            return self.space.seed(seed=seed)
    def __init__(self, scenario_name='', scenario_cfg=None, max_steps=200, 
                 gif_freq=500, steps_per_action=4, image_dir='images/test', 
                 viewport=False):

        # Load holodeck environment
        if scenario_cfg is not None and \
            scenario_cfg['package_name'] not in holodeck.installed_packages():
            
            holodeck.install(scenario_cfg['package_name'])

        self._env = holodeck.make(scenario_name=scenario_name, 
                                  scenario_cfg=scenario_cfg, 
                                  show_viewport=viewport)

        # Get action space from holodeck env and store for use with rlpyt
        if self.is_action_continuous:
            self._action_space = FloatBox(-1, 1, self._env.action_space.shape)

        else:
            self._action_space = IntBox(self._env.action_space.get_low(), 
                                        self._env.action_space.get_high(), 
                                        ())

        # Calculate observation space with all sensor data
        max_width = 0
        max_height = 0
        num_img = 0
        num_lin = 0
        for sensor in self._env._agent.sensors.values():
            if 'Task' in sensor.name:
                continue
            shape = sensor.sensor_data.shape
            if len(shape) == 3:
                max_width = max(max_width, shape[0])
                max_height = max(max_height, shape[1])
                num_img += shape[2]
            else:
                num_lin += np.prod(shape)
        
        if num_img > 0 and num_lin == 0:
            self.has_img = True
            self.has_lin = False
            self._observation_space = FloatBox(0, 1, 
                (num_img, max_width, max_height))
        elif num_lin > 0 and num_img == 0:
            self.has_img = False
            self.has_lin = True
            self._observation_space = FloatBox(-256, 256, (num_lin,))
        else:
            self.has_img = True
            self.has_lin = True
            self._observation_space = Composite([
                FloatBox(0, 1, (num_img, max_width, max_height)),
                FloatBox(-256, 256, (num_lin,))],
                HolodeckObservation)

        # Set data members
        self._max_steps = max_steps
        self._image_dir = image_dir
        self._steps_per_action = steps_per_action
        self.curr_step = 0
        self.gif_freq = gif_freq
        self.rollout_count = -1
        self.gif_images = []
class HolodeckEnv(Env):
    def __init__(self, scenario_name='', scenario_cfg=None, max_steps=200, 
                 gif_freq=500, steps_per_action=4, image_dir='images/test', 
                 viewport=False):

        # Load holodeck environment
        if scenario_cfg is not None and \
            scenario_cfg['package_name'] not in holodeck.installed_packages():
            
            holodeck.install(scenario_cfg['package_name'])

        self._env = holodeck.make(scenario_name=scenario_name, 
                                  scenario_cfg=scenario_cfg, 
                                  show_viewport=viewport)

        # Get action space from holodeck env and store for use with rlpyt
        if self.is_action_continuous:
            self._action_space = FloatBox(-1, 1, self._env.action_space.shape)

        else:
            self._action_space = IntBox(self._env.action_space.get_low(), 
                                        self._env.action_space.get_high(), 
                                        ())

        # Calculate observation space with all sensor data
        max_width = 0
        max_height = 0
        num_img = 0
        num_lin = 0
        for sensor in self._env._agent.sensors.values():
            if 'Task' in sensor.name:
                continue
            shape = sensor.sensor_data.shape
            if len(shape) == 3:
                max_width = max(max_width, shape[0])
                max_height = max(max_height, shape[1])
                num_img += shape[2]
            else:
                num_lin += np.prod(shape)
        
        if num_img > 0 and num_lin == 0:
            self.has_img = True
            self.has_lin = False
            self._observation_space = FloatBox(0, 1, 
                (num_img, max_width, max_height))
        elif num_lin > 0 and num_img == 0:
            self.has_img = False
            self.has_lin = True
            self._observation_space = FloatBox(-256, 256, (num_lin,))
        else:
            self.has_img = True
            self.has_lin = True
            self._observation_space = Composite([
                FloatBox(0, 1, (num_img, max_width, max_height)),
                FloatBox(-256, 256, (num_lin,))],
                HolodeckObservation)

        # Set data members
        self._max_steps = max_steps
        self._image_dir = image_dir
        self._steps_per_action = steps_per_action
        self.curr_step = 0
        self.gif_freq = gif_freq
        self.rollout_count = -1
        self.gif_images = []

    @property
    def horizon(self):
        return self._max_steps

    @property
    def img_size(self):
        img_null = self._get_img_null()
        if img_null is not None:
            return img_null.shape
        else:
            return None

    @property
    def lin_size(self):
        lin_null = self._get_lin_null()
        if lin_null is not None:
            return lin_null.shape[0]
        else:
            return None
        
    @property
    def action_size(self):
        return self._env.action_space.shape[0] \
            if self.is_action_continuous \
                else self._env.action_space.get_high()

    @property
    def is_action_continuous(self):
        return isinstance(self._env.action_space, 
                          holodeck.spaces.ContinuousActionSpace)

    def reset(self):
        ''' Resets env and returns initial state

            Returns:
                (np array)       
        '''
        sensor_dict = self._env.reset()
        self.curr_step = 0

        self.rollout_count += 1
        if len(self.gif_images) > 0:
            print('Making gif...')
            img_file = 'holodeck{}.gif'.format(self.rollout_count)
            img_path = os.path.join(self._image_dir, img_file)
            self._make_gif(self.gif_images, img_path)
            self.gif_images = []

        return self._get_state_rep(sensor_dict)

    def step(self, action):
        ''' Passes action to env and returns next state, reward, and terminal

            Args:
                action(int): Int represnting action in action space

            Returns:
                (EnvStep:named_tuple_array)        
        '''
        reward = 0

        if self.is_action_continuous: 
            action *= np.array(self._env.action_space.get_high())

        for _ in range(self._steps_per_action):
            sensor_dict, temp_reward, terminal, _ = self._env.step(action)
            reward += temp_reward

        if self.rollout_count % self.gif_freq == 0 and self.has_img:
            self.gif_images.append(self.get_img(sensor_dict))

        state_rep = self._get_state_rep(sensor_dict)

        self.curr_step += 1
        if self.curr_step >= self._max_steps:
            terminal = True

        return EnvStep(state_rep, np.array(reward), terminal, None)

    def get_img(self, sensor_dict):
        return sensor_dict['RGBCamera'] if self.has_img else None

    def _get_state_rep(self, sensor_dict):
        ''' Holodeck returns a dictionary of sensors. 
            The agent requires a single np array.

            Args:
                sensor_dict(dict(nparray)): A dictionay of array 
                representations of available sensors

            Returns:
                (nparray)
        '''

        if self._env.num_agents > 1:  # Only include main agent observations
            sensor_dict = sensor_dict[self._env._agent.name]  # TODO get main agent without accessing protected member

        img = self._get_img_null()
        lin = self._get_lin_null()

        img = img.astype(np.float32) if img is not None else None
        lin = lin.astype(np.float32) if lin is not None else None

        curr_img = 0
        curr_lin = 0
        for name, value in sensor_dict.items():
            if 'Task' in name:  # Do not include tasks in observation
                continue
            if len(value.shape) == 3:
                width = value.shape[0]
                height = value.shape[1]
                for c in range(value.shape[2]):
                    img[curr_img][:width][:height] = value[:,:,c] / 255
                    curr_img += 1
            else:
                sensor_flat = value.flatten()
                lin[curr_lin : curr_lin + sensor_flat.shape[0]] = sensor_flat
                curr_lin += sensor_flat.shape[0]

        if self.has_img and self.has_lin:
            return HolodeckObservation(img, lin)
        elif self.has_img:
            return img
        else:
            return lin

    def _make_gif(self, rollout, filename):
        with imageio.get_writer(filename, mode='I', duration=1 / 30) as writer:
            for x in rollout:
                writer.append_data((x).astype(np.uint8))

    def _get_img_null(self):
        if self.has_img and self.has_lin:
            return self._observation_space.spaces[0].null_value()
        elif self.has_img:
            return self._observation_space.null_value()
        else:
            return None

    def _get_lin_null(self):
        if self.has_img and self.has_lin:
            return self._observation_space.spaces[1].null_value()
        elif self.has_lin:
            return self._observation_space.null_value()
        else:
            return None
Example #12
0
class GymSpaceWrapper:
    """Wraps a gym space to interface from dictionaries to namedtuples."""
    def __init__(self, space, null_value=0, name="obs", force_float32=True):
        self._gym_space = space
        self._base_name = name
        self._null_value = null_value
        if isinstance(space, GymDict):
            nt = globals().get(name)
            if nt is None:
                nt = namedtuple(name, [k for k in space.spaces.keys()])
                globals()[name] = nt  # Put at module level for pickle.
            elif not (is_namedtuple_class(nt) and sorted(nt._fields) == sorted(
                [k for k in space.spaces.keys()])):
                raise ValueError(f"Name clash in globals: {name}.")
            spaces = [
                GymSpaceWrapper(space=v,
                                null_value=null_value,
                                name="_".join([name, k]),
                                force_float32=force_float32)
                for k, v in space.spaces.items()
            ]
            self.space = Composite(spaces, nt)
            self._dtype = None
        else:
            self.space = space
            self._dtype = np.float32 if (space.dtype == np.float64
                                         and force_float32) else None

    def sample(self):
        sample = self.space.sample()
        if self.space is self._gym_space:  # Not Composite.
            # Force numpy array, might force float64->float32.
            sample = np.asarray(sample, dtype=self._dtype)
        return sample

    def null_value(self):
        if self.space is self._gym_space:
            null = np.asarray(self.space.sample(), dtype=self._dtype)
            if self._null_value is not None:
                try:
                    null[:] = self._null_value
                except IndexError:  # e.g. scalar.
                    null.fill(self._null_value)
            else:
                null.fill(0)
        else:  # Is composite.
            null = self.space.null_value()
        return null

    def convert(self, value):
        # Convert wrapped env's observation from dict to namedtuple.
        return dict_to_nt(value, name=self._base_name)

    def revert(self, value):
        # Revert namedtuple action into wrapped env's dict.
        return nt_to_dict(value)

    @property
    def dtype(self):
        return self._dtype or self.space.dtype

    @property
    def shape(self):
        return self.space.shape

    def contains(self, x):
        return self.space.contains(x)

    def __repr__(self):
        return self.space.__repr__()

    def __eq__(self, other):
        return self.space.__eq__(other)

    @property
    def low(self):
        return self.space.low

    @property
    def high(self):
        return self.space.high

    @property
    def n(self):
        return self.space.n