Ejemplo n.º 1
0
    def gen_obs_space(self, encoder_dict=None):
        state = self.state_function()
        if isinstance(state, dict):
            ob_space_dict = OrderedDict()

            for space_name in state.keys():
                if encoder_dict is not None and "im" in space_name:
                    if "im" in encoder_dict.keys():
                        shape = encoder_dict[space_name].get_output_shape_at(
                            0)[1:]
                    else:
                        shape = state[space_name].shape
                elif encoder_dict is not None and space_name == "forces":
                    if "forces" in encoder_dict.keys():
                        shape = (
                            encoder_dict[space_name].flow[-1].output_dim, )
                    else:
                        shape = state["forces"].shape
                else:
                    shape = state[space_name].shape
                high = np.inf * np.ones(shape)
                low = -high
                ob_space_dict[space_name] = spaces.Box(low,
                                                       high,
                                                       dtype=np.float32)
            self.observation_space = Dict(ob_space_dict)
        else:
            high = np.inf * np.ones(state.shape)
            low = -high
            self.observation_space = spaces.Box(low, high, dtype=np.float32)
Ejemplo n.º 2
0
 def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     self.reward_type = reward_type
     self.indicator_threshold=indicator_threshold
     self.fixed_goal = fixed_goal
     self.fix_goal = fix_goal
     self._state_goal = None
     self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed))
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8])
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.reset()
 def __init__(self, config):
     self.observation_space = config.get(
         "space", Tuple([Discrete(2),
                         Dict({"a": Box(-1.0, 1.0, (2, ))})]))
     self.action_space = self.observation_space
     self.flattened_action_space = flatten_space(self.action_space)
     self.episode_len = config.get("episode_len", 100)
    def __init__(self, env):
        super().__init__(env)

        self._num_action_parts = 1
        self._action_params_offset = 0
        if not self.has_avatar:
            self._num_action_parts += 1
            self._action_params_offset = 1

        self._action_splits = np.zeros(self._num_action_parts)

        self._total_position_params = 0
        if not self.has_avatar:
            self._action_splits[0] = self.width * self.height
            self._total_position_params += self.width * self.height

        self._action_logit_offsets = {}

        total_action_params = 0
        for i, action_name in enumerate(self.env.action_names):
            self._action_logit_offsets[
                action_name] = total_action_params + self._total_position_params
            total_action_params += self.num_action_ids[action_name]

        self._action_splits[self._action_params_offset] = total_action_params

        self._total_actions = int(np.sum(self._action_splits))

        self.action_space = MultiDiscrete(self._action_splits)
        self.observation_space = Dict({
            'obs':
            self.observation_space,
            'mask':
            Box(0, 1, shape=(self._total_actions, )),
        })
    def _augment_observation_space(self) -> None:
        """
        Update the observation space with augmented dimensions
        """
        for agent, space in self.observation_spaces.items():
            if self._use_dict_obs_space:
                spaces = {
                    'original_obs': space,
                    'random_noise': Box(low=np.full(self._latent_parameter_dim, -np.inf),
                                        high=np.full(self._latent_parameter_dim, np.inf),
                                        dtype=np.float32)
                }
                # This 'Dict' is gym.spaces.Dict
                new_space = Dict(spaces)
            else:
                if not isinstance(space, Box):
                    raise TypeError("Cannot handle spaces that are not box now")
                if len(space.shape) > 1:
                    raise TypeError("Cannot handle non-flat spaces now")
                original_low = space.low
                original_high = space.high
                low = np.concatenate([original_low, np.full(self._latent_parameter_dim, -np.inf)])
                high = np.concatenate([original_high, np.full(self._latent_parameter_dim, np.inf)])
                new_space = Box(low=low, high=high, dtype=np.float32)

            self.observation_spaces[agent] = new_space
Ejemplo n.º 6
0
    def __init__(self, env, keys_self, keys_copy=[]):
        super().__init__(env)
        self.keys_self = sorted(keys_self)
        self.keys_copy = sorted(keys_copy)
        self.n_agents = self.metadata['n_actors']
        new_spaces = {}
        for k, v in self.observation_space.spaces.items():
            # If obs is a self obs, then we only want to include other agents obs,
            # as we will pass the self obs separately.
            assert len(v.shape) > 1, f'Obs {k} has shape {v.shape}'
            if 'mask' in k:
                if k in self.keys_self:
                    new_spaces[k] = Box(low=v.low[:, 1:], high=v.high[:, 1:], dtype=v.dtype)
                else:
                    new_spaces[k] = v
            elif k in self.keys_self:
                assert v.shape[0] == self.n_agents, (
                    f"For self obs, obs dim 0 must equal number of agents. {k} has shape {v.shape}")
                obs_shape = (v.shape[0], self.n_agents - 1, v.shape[1])
                lows = np.tile(v.low, self.n_agents - 1).reshape(obs_shape)
                highs = np.tile(v.high, self.n_agents - 1).reshape(obs_shape)
                new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype)
            elif k in self.keys_copy:
                new_spaces[k] = deepcopy(v)
            else:
                obs_shape = (v.shape[0], self.n_agents, v.shape[1])
                lows = np.tile(v.low, self.n_agents).reshape(obs_shape).transpose((1, 0, 2))
                highs = np.tile(v.high, self.n_agents).reshape(obs_shape).transpose((1, 0, 2))
                new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype)

        for k in self.keys_self:
            new_spaces[k + '_self'] = self.observation_space.spaces[k]

        self.observation_space = Dict(new_spaces)
Ejemplo n.º 7
0
    def __init__(self, **kwargs):
        """
        kwargs
        ------
        apple_num : int
            number of apples in a map. Default is 1
        eat_apple : float
            reward given when apple is eaten. Default is 1.0
        hit_wall : float
            punishment(or reward?) given when hit wall. Default is 0
        """
        #Turn left 45°, Move forward, Turn right 45°
        self.action_space = Discrete(3)
        self._done = False
        self.viewer = None
        self.engine = None

        kwargs.setdefault('apple_num', 1)
        kwargs.setdefault('eat_apple', 1.0)
        kwargs.setdefault('hit_wall', 0)
        self._options = kwargs

        self.max_step = 1000
        self.cur_step = 0
        self.image_size = (720, 720)
        self.seed()

        # 3 Continuous Inputs from both eyes
        self.observation_space = Dict({
            'Right':
            Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8),
            'Left':
            Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8)
        })
Ejemplo n.º 8
0
    def __init__(self, x_dim=5, y_dim=5, **kwargs):

        # From https://github.com/openai/gym/blob/master/gym/envs/robotics/fetch/pick_and_place.py
        initial_qpos = {
            'robot0:slide0': 0.405,
            'robot0:slide1': 0.48,
            'robot0:slide2': 0.0,
            'object0:joint': [1.3, 0.75, 0.4, 1., 0., 0., 0.],
        }
        fetch_env.FetchEnv.__init__(self,
                                    MODEL_XML_PATH,
                                    has_object=True,
                                    block_gripper=False,
                                    n_substeps=20,
                                    gripper_extra_height=0.2,
                                    target_in_the_air=False,
                                    target_offset=0.0,
                                    obj_range=0.10,
                                    target_range=0.10,
                                    distance_threshold=0.05,
                                    initial_qpos=initial_qpos,
                                    reward_type="sparse")

        self.x_states = x_dim
        self.y_states = y_dim
        self._old_action_space = self.action_space
        self._old_observation_space = self.observation_space
        self.action_space = Discrete(5)  #Right, Up, Left, Down, Grab
        self.observation_space = Dict(
            dict(desired_goal=Discrete(x_dim * y_dim),
                 achieved_goal=Discrete(x_dim * y_dim),
                 observation=Dict(
                     dict(position=Discrete(x_dim * y_dim),
                          target_position=Discrete(x_dim * y_dim + 1)))))
        self.action_handlers = [
            self._move_function((0., 1., 0., -1.),
                                no_move_if=lambda s: s % self.x_states == 0),
            self._move_function((1., 0., 0., -1.),
                                no_move_if=lambda s: s < self.x_states),
            self._move_function((0., -1., 0., -1.),
                                no_move_if=lambda s:
                                (s + 1) % self.x_states == 0),
            self._move_function((-1., 0., 0., -1.),
                                no_move_if=lambda s: s / self.x_states >
                                (self.y_states - 1))
        ]
        self.action_handlers.append(self._grab)
Ejemplo n.º 9
0
 def __init__(self,
              frame_skip=30,
              action_scale=10,
              keep_vel_in_obs=True,
              use_safety_box=False,
              fix_goal=False,
              fixed_goal=(0.05, 0.6, 0.15),
              reward_type='hand_distance',
              indicator_threshold=.05,
              goal_low=None,
              goal_high=None,
              ):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     if goal_low is None:
         goal_low = np.array([-0.1, 0.5, 0.02])
     else:
         goal_low = np.array(goal_low)
     if goal_high is None:
         goal_high = np.array([0.1, 0.7, 0.2])
     else:
         goal_high = np.array(goal_low)
     self.safety_box = Box(
         goal_low,
         goal_high
     )
     self.keep_vel_in_obs = keep_vel_in_obs
     self.goal_space = Box(goal_low, goal_high)
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(
         -np.inf * np.ones(3),
         np.inf * np.ones(3)
     )
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.fix_goal = fix_goal
     self.fixed_goal = np.array(fixed_goal)
     self.use_safety_box=use_safety_box
     self.prev_qpos = self.init_angles.copy()
     self.reward_type = reward_type
     self.indicator_threshold = indicator_threshold
     goal = self.sample_goal()
     self._state_goal = goal['state_desired_goal']
     self.reset()
Ejemplo n.º 10
0
    def __init__(
        self,
        wrapped_env,
        vae,
        pixel_cnn=None,
        vae_input_key_prefix='image',
        sample_from_true_prior=False,
        decode_goals=False,
        decode_goals_on_reset=True,
        render_goals=False,
        render_rollouts=False,
        reward_params=None,
        goal_sampling_mode="vae_prior",
        imsize=84,
        obs_size=None,
        norm_order=2,
        epsilon=20,
        presampled_goals=None,
    ):
        if reward_params is None:
            reward_params = dict()
        super().__init__(
            wrapped_env,
            vae,
            vae_input_key_prefix,
            sample_from_true_prior,
            decode_goals,
            decode_goals_on_reset,
            render_goals,
            render_rollouts,
            reward_params,
            goal_sampling_mode,
            imsize,
            obs_size,
            norm_order,
            epsilon,
            presampled_goals,
            )

        if type(pixel_cnn) is str:
            self.pixel_cnn = load_local_or_remote_file(pixel_cnn)
        self.representation_size = self.vae.representation_size
        self.imsize = self.vae.imsize
        print("Location: BiGAN WRAPPER")

        latent_space = Box(
            -10 * np.ones(obs_size or self.representation_size),
            10 * np.ones(obs_size or self.representation_size),
            dtype=np.float32,
        )

        spaces = self.wrapped_env.observation_space.spaces
        spaces['observation'] = latent_space
        spaces['desired_goal'] = latent_space
        spaces['achieved_goal'] = latent_space
        spaces['latent_observation'] = latent_space
        spaces['latent_desired_goal'] = latent_space
        spaces['latent_achieved_goal'] = latent_space
        self.observation_space = Dict(spaces)
Ejemplo n.º 11
0
    def __init__(
        self,
        goal_low=(-.25, .3, .12, -1.5708),
        goal_high=(.25, .6, .12, 0),
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0, .45, .12, -.25),
        reset_free=False,
        fixed_hand_z=0.12,
        hand_low=(-0.25, 0.3, .12),
        hand_high=(0.25, 0.6, .12),
        target_pos_scale=1,
        target_angle_scale=1,
        min_angle=-1.5708,
        max_angle=0,
        xml_path='sawyer_xyz/sawyer_door_pull.xml',
        **sawyer_xyz_kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(
            self,
            self.model_name,
            hand_low=hand_low,
            hand_high=hand_high,
            **sawyer_xyz_kwargs
        )
        MultitaskEnv.__init__(self)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1]), np.array([1, 1]))
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()
Ejemplo n.º 12
0
 def __init__(self):
     self.counter = 0
     self.observation_space = Dict({
         'test1d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5]),
         'test2d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5]),
         'test3d': Box(low=-1e3, high=1e3, dtype=np.float32, shape=[5])
     })
     self.action_space = self.observation_space
Ejemplo n.º 13
0
 def __init__(self):
     self.action_space = Discrete(max_action_size)
     self.observation_space = Dict({
         "state":
         REAL_OBSERVATION_SPACE,
         "action_mask":
         Box(low=0, high=1, shape=(max_action_size, ))
     })
    def __init__(self,
                 puck_low=None,
                 puck_high=None,
                 reward_type='hand_and_puck_distance',
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        goal_low = np.array(goal_low)
        goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = self.get_puck_pos()[2]
Ejemplo n.º 15
0
    def __init__(
            self,
            wrapped_env,
            encoder: Encoder,
            encoder_input_prefix,
            key_prefix='encoder',
            reward_mode='encoder_distance',
    ):
        """

        :param wrapped_env:
        :param encoder:
        :param encoder_input_prefix:
        :param key_prefix:
        :param reward_mode:
         - 'encoder_distance': l1 distance in encoder distance
         - 'vectorized_encoder_distance': vectorized l1 distance in encoder
             distance, i.e. negative absolute value
         - 'env': use the wrapped env's reward
        """
        super().__init__(wrapped_env)
        if reward_mode not in {
            self.ENCODER_DISTANCE_REWARD,
            self.VECTORIZED_ENCODER_DISTANCE_REWARD,
            self.ENV_REWARD,
        }:
            raise ValueError(reward_mode)
        self._encoder = encoder
        self._encoder_input_obs_key = '{}_observation'.format(
            encoder_input_prefix)
        self._encoder_input_desired_goal_key = '{}_desired_goal'.format(
            encoder_input_prefix
        )
        self._encoder_input_achieved_goal_key = '{}_achieved_goal'.format(
            encoder_input_prefix
        )
        self._reward_mode = reward_mode
        spaces = self.wrapped_env.observation_space.spaces
        latent_space = Box(
            encoder.min_embedding,
            encoder.max_embedding,
            dtype=np.float32,
        )
        self._embedding_size = encoder.min_embedding.size
        self._obs_key = '{}_observation'.format(key_prefix)
        self._desired_goal_key = '{}_desired_goal'.format(key_prefix)
        self._achieved_goal_key = '{}_achieved_goal'.format(key_prefix)
        self._distance_name = '{}_distance'.format(key_prefix)

        self._key_prefix = key_prefix
        self._desired_goal = {
            self._desired_goal_key: np.zeros_like(latent_space.sample())
        }
        spaces[self._obs_key] = latent_space
        spaces[self._desired_goal_key] = latent_space
        spaces[self._achieved_goal_key] = latent_space
        self.observation_space = Dict(spaces)
        self._goal_sampling_mode = 'env'
Ejemplo n.º 16
0
    def __init__(
        self,
        wrapped_env,
        vae,
        vae_input_key_prefix='image',
        sample_from_true_prior=False,
        decode_goals=False,
        decode_goals_on_reset=True,
        render_goals=False,
        render_rollouts=False,
        reward_params=None,
        goal_sampling_mode="vae_prior",
        num_goals_to_presample=0,
        presampled_goals_path=None,
        imsize=48,
        obs_size=None,
        norm_order=2,
        epsilon=20,
        presampled_goals=None,
    ):
        if reward_params is None:
            reward_params = dict()
        super().__init__(
            wrapped_env,
            vae,
            vae_input_key_prefix,
            sample_from_true_prior,
            decode_goals,
            decode_goals_on_reset,
            render_goals,
            render_rollouts,
            reward_params,
            goal_sampling_mode,
            presampled_goals_path,
            num_goals_to_presample,
            imsize,
            obs_size,
            norm_order,
            epsilon,
            presampled_goals,
        )

        self.representation_size = self.vae.representation_size

        latent_space = Box(
            -10 * np.ones(obs_size or self.representation_size),
            10 * np.ones(obs_size or self.representation_size),
            dtype=np.float32,
        )

        spaces = self.wrapped_env.observation_space.spaces
        spaces['observation'] = latent_space
        spaces['desired_goal'] = latent_space
        spaces['achieved_goal'] = latent_space
        spaces['latent_observation'] = latent_space
        spaces['latent_desired_goal'] = latent_space
        spaces['latent_achieved_goal'] = latent_space
        self.observation_space = Dict(spaces)
Ejemplo n.º 17
0
    def __init__(
        self,
        frame_skip=30,
        goal_low=(-.1, .5, .06, 0),
        goal_high=(.1, .6, .06, .5),
        action_reward_scale=0,
        pos_action_scale=1 / 100,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0.15, 0.6, 0.3, 0),
        target_pos_scale=0.25,
        target_angle_scale=1,
        max_x_pos=.1,
        max_y_pos=.7,
    ):

        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(
            np.array(goal_low),
            np.array(goal_high),
        )
        self._state_goal = None

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]))
        max_angle = 1.5708
        self.state_space = Box(
            np.array([-1, -1, -1, -max_angle]),
            np.array([1, 1, 1, max_angle]),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.state_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.state_space),
            ('state_achieved_goal', self.state_space),
        ])
        self._pos_action_scale = pos_action_scale
        self.target_pos_scale = target_pos_scale
        self.action_reward_scale = action_reward_scale
        self.target_angle_scale = target_angle_scale

        self.max_x_pos = max_x_pos
        self.max_y_pos = max_y_pos
        self.min_y_pos = .5

        self.reset()
        self.reset_mocap_welds()
Ejemplo n.º 18
0
    def __init__(self,
                 obj_low=None,
                 obj_high=None,
                 reward_type='hand_and_obj_distance',
                 indicator_threshold=0.06,
                 obj_init_pos=(0, 0.65, 0.03),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 hide_arm=False,
                 num_objects=1,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.hide_arm = hide_arm
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        self.num_objects = num_objects
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, np.tile(obj_low,
                                                         num_objects)))
        if goal_high is None:
            goal_high = np.hstack(
                (self.hand_high, np.tile(obj_high, num_objects)))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, np.tile(obj_low, num_objects))),
            np.hstack((self.hand_high, np.tile(obj_high, num_objects))),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])
Ejemplo n.º 19
0
    def __init__(self,
                 env_name=None,
                 mode=None,
                 act_dim=None,
                 reward_type='threshold',
                 img_dim=32,
                 camera_id=0,
                 path_length=200,
                 threshold=0.8,
                 gpu_id=0,
                 **kwargs):

        self.dm_env = suite.load(env_name, mode)
        self.task = self.dm_env._task
        self.camera_id = camera_id
        self.physics = self.dm_env.physics
        self.max_steps = path_length
        self.threshold = threshold
        self.reward_type = reward_type
        self.device = torch.device(
            "cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu")

        # actions always between -1.0 and 1.0
        self.action_space = Box(
            high=1.0,
            low=-1.0,
            shape=self.dm_env.action_spec().shape if act_dim is None else
            (act_dim, ))

        # observtions are, in principle, unbounded
        state_space = Box(high=float("inf"), low=-float("inf"), shape=(128, ))

        goal_space = Box(high=float("inf"), low=-float("inf"), shape=(128, ))

        self.observation_space = Dict([
            ('observation', state_space),
            ('desired_goal', goal_space),
            ('achieved_goal', goal_space),
            #('state_observation', state_space),
            #('state_desired_goal', goal_space),
            #('state_achieved_goal', goal_space),
        ])

        self.render_kwargs = dict(width=img_dim,
                                  height=img_dim,
                                  camera_id=self.camera_id)

        self.model1 = VAE(img_dim=img_dim,
                          image_channels=3,
                          z_dim=128,
                          device=self.device)
        self.model2 = VAE(img_dim=img_dim,
                          image_channels=3,
                          z_dim=128,
                          device=self.device)

        self.model1 = self.model1.to(self.device)
        self.model2 = self.model2.to(self.device)
Ejemplo n.º 20
0
    def __init__(self, config: Config):
        spaces = {
            "pointgoal":
            Box(
                low=np.finfo(np.float32).min,
                high=np.finfo(np.float32).max,
                shape=(2, ),
                dtype=np.float32,
            )
        }

        if config.INPUT_TYPE in ["depth", "rgbd"]:
            spaces["depth"] = Box(
                low=0,
                high=1,
                shape=(config.RESOLUTION, config.RESOLUTION, 1),
                dtype=np.float32,
            )

        if config.INPUT_TYPE in ["rgb", "rgbd"]:
            spaces["rgb"] = Box(
                low=0,
                high=255,
                shape=(config.RESOLUTION, config.RESOLUTION, 3),
                dtype=np.uint8,
            )
        observation_spaces = Dict(spaces)

        action_spaces = Discrete(4)

        self.device = torch.device("cuda:{}".format(config.PTH_GPU_ID))
        self.hidden_size = config.HIDDEN_SIZE

        random.seed(config.RANDOM_SEED)
        torch.random.manual_seed(config.RANDOM_SEED)
        torch.backends.cudnn.deterministic = True

        self.actor_critic = Policy(
            observation_space=observation_spaces,
            action_space=action_spaces,
            hidden_size=self.hidden_size,
        )
        self.actor_critic.to(self.device)

        if config.MODEL_PATH:
            ckpt = torch.load(config.MODEL_PATH, map_location=self.device)
            #  Filter only actor_critic weights
            self.actor_critic.load_state_dict({
                k.replace("actor_critic.", ""): v
                for k, v in ckpt["state_dict"].items() if "actor_critic" in k
            })

        else:
            habitat.logger.error("Model checkpoint wasn't loaded, evaluating "
                                 "a random model.")

        self.test_recurrent_hidden_states = None
        self.not_done_masks = None
Ejemplo n.º 21
0
class AvailActionsTestEnv(MultiAgentEnv):
    num_actions = 10
    action_space = Discrete(num_actions)
    observation_space = Dict({
        "obs":
        Dict({
            "test": Dict({
                "a": Discrete(2),
                "b": MultiDiscrete([2, 3, 4])
            }),
            "state": MultiDiscrete([2, 2, 2])
        }),
        "action_mask":
        Box(0, 1, (num_actions, )),
    })

    def __init__(self, env_config):
        self.state = None
        self.avail = env_config["avail_action"]
        self.action_mask = np.array([0] * 10)
        self.action_mask[env_config["avail_action"]] = 1

    def reset(self):
        self.state = 0
        return {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask
            }
        }

    def step(self, action_dict):
        if self.state > 0:
            assert action_dict["agent_1"] == self.avail, \
                "Failed to obey available actions mask!"
        self.state += 1
        rewards = {"agent_1": 1}
        obs = {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask
            }
        }
        dones = {"__all__": self.state > 20}
        return obs, rewards, dones, {}
Ejemplo n.º 22
0
def test_seed_Dict():
    test_space = Dict(
        {
            "a": Box(low=0, high=1, shape=(3, 3)),
            "b": Dict(
                {
                    "b_1": Box(low=-100, high=100, shape=(2,)),
                    "b_2": Box(low=-1, high=1, shape=(2,)),
                }
            ),
            "c": Discrete(5),
        }
    )

    seed_dict = {
        "a": 0,
        "b": {
            "b_1": 1,
            "b_2": 2,
        },
        "c": 3,
    }

    test_space.seed(seed_dict)

    # "Unpack" the dict sub-spaces into individual spaces
    a = Box(low=0, high=1, shape=(3, 3))
    a.seed(0)
    b_1 = Box(low=-100, high=100, shape=(2,))
    b_1.seed(1)
    b_2 = Box(low=-1, high=1, shape=(2,))
    b_2.seed(2)
    c = Discrete(5)
    c.seed(3)

    for i in range(10):
        test_s = test_space.sample()
        a_s = a.sample()
        assert (test_s["a"] == a_s).all()
        b_1_s = b_1.sample()
        assert (test_s["b"]["b_1"] == b_1_s).all()
        b_2_s = b_2.sample()
        assert (test_s["b"]["b_2"] == b_2_s).all()
        c_s = c.sample()
        assert test_s["c"] == c_s
Ejemplo n.º 23
0
 def observation_space(self):
     return Dict({
         "x": self._x_space,
         "y": self._y_space,
         "theta": self._theta_space,
         "item": self._item_space,
         "command": self._command_space,
         "completed_tasks": self._task_completed_space
     })
Ejemplo n.º 24
0
 def set_image_obsSpace(self):
     if self.camera_name == 'robotview_zoomed':
         self.observation_space = Dict([
             ('img_observation',
              Box(0,
                  1, (3 * (48 * 64) + self.action_space.shape[0], ),
                  dtype=np.float32)),  #We append robot config to the image
             ('state_observation', self.hand_env_space),
         ])
Ejemplo n.º 25
0
 def __init__(self, env):
     super().__init__(env)
     self.n_agents = self.metadata['n_actors']
     lows = np.split(self.action_space.low, self.n_agents)
     highs = np.split(self.action_space.high, self.n_agents)
     self.action_space = Dict({
         'action_movement': Tuple([Box(low=low, high=high, dtype=self.action_space.dtype)
                                   for low, high in zip(lows, highs)])
     })
Ejemplo n.º 26
0
    def __init__(self,
                 rules_file,
                 leaf_threshold=16,
                 max_cuts_per_dimension=5,
                 max_actions_per_episode=5000,
                 max_depth=100,
                 partition_mode=None,
                 reward_shape="linear",
                 depth_weight=1.0,
                 dump_dir=None):

        self.reward_shape = {
            "linear": lambda x: x,
            "log": lambda x: np.log(x),
        }[reward_shape]

        assert partition_mode in [None, "simple", "efficuts", "cutsplit"]
        self.partition_enabled = partition_mode == "simple"
        if partition_mode in ["efficuts", "cutsplit"]:
            self.force_partition = partition_mode
        else:
            self.force_partition = False

        self.dump_dir = dump_dir and os.path.expanduser(dump_dir)
        if self.dump_dir:
            os.makedirs(self.dump_dir, exist_ok=True)
        self.best_time = float("inf")
        self.best_space = float("inf")

        self.depth_weight = depth_weight
        self.rules_file = rules_file
        self.rules = load_rules_from_file(rules_file)
        self.leaf_threshold = leaf_threshold
        self.max_actions_per_episode = max_actions_per_episode
        self.max_depth = max_depth
        self.num_actions = None
        self.tree = None
        self.node_map = None
        self.child_map = None
        self.max_cuts_per_dimension = max_cuts_per_dimension
        if self.partition_enabled:
            self.num_part_levels = NUM_PART_LEVELS
        else:
            self.num_part_levels = 0
        self.action_space = Tuple([
            Discrete(NUM_DIMENSIONS),
            Discrete(max_cuts_per_dimension + self.num_part_levels)
        ])
        self.observation_space = Dict({
            "real_obs":
            Box(0, 1, (278, ), dtype=np.float32),
            "action_mask":
            Box(0,
                1, (NUM_DIMENSIONS + max_cuts_per_dimension +
                    self.num_part_levels, ),
                dtype=np.float32),
        })
Ejemplo n.º 27
0
 def __init__(
     self, image_size=48, **kwargs
 ):
     PointmassEnv.__init__(self, **kwargs)
     self.image_size = image_size
     self.observation_space = Dict({
         **self.observation_space, "image_observation": Box(
             np.zeros([image_size, image_size, 3]),
             np.ones([image_size, image_size, 3]))})
Ejemplo n.º 28
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            obj_init_pos=(0, 0.6, 0.02),
            fix_goal=True,
            fixed_goal=(1.571),
            # target angle for the door
            goal_low=None,
            goal_high=None,
            hide_goal_markers=False,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.max_path_length = 150

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])

        self.reset()
Ejemplo n.º 29
0
    def test_convert_element_to_space_type(self):
        """Test if space converter works for all elements/space permutations"""
        box_space = Box(low=-1, high=1, shape=(2, ))
        discrete_space = Discrete(2)
        multi_discrete_space = MultiDiscrete([2, 2])
        multi_binary_space = MultiBinary(2)
        tuple_space = Tuple((box_space, discrete_space))
        dict_space = Dict({
            "box":
            box_space,
            "discrete":
            discrete_space,
            "multi_discrete":
            multi_discrete_space,
            "multi_binary":
            multi_binary_space,
            "dict_space":
            Dict({
                "box2": box_space,
                "discrete2": discrete_space,
            }),
            "tuple_space":
            tuple_space,
        })

        box_space_uncoverted = box_space.sample().astype(np.float64)
        multi_discrete_unconverted = multi_discrete_space.sample().astype(
            np.int32)
        multi_binary_unconverted = multi_binary_space.sample().astype(np.int32)
        tuple_unconverted = (box_space_uncoverted, float(0))
        modified_element = {
            "box": box_space_uncoverted,
            "discrete": float(0),
            "multi_discrete": multi_discrete_unconverted,
            "multi_binary": multi_binary_unconverted,
            "tuple_space": tuple_unconverted,
            "dict_space": {
                "box2": box_space_uncoverted,
                "discrete2": float(0),
            },
        }
        element_with_correct_types = convert_element_to_space_type(
            modified_element, dict_space.sample())
        assert dict_space.contains(element_with_correct_types)
Ejemplo n.º 30
0
def test_compatibility():
    # create all testable spaces
    spaces = [
        Box(-1.0, 1.0, (20, ), np.float32),
        Box(-1.0, 1.0, (40, ), np.float32),
        Box(-1.0, 1.0, (20, 20), np.float32),
        Box(-1.0, 1.0, (20, 24), np.float32),
        Dict({
            'A': Box(-1.0, 1.0, (20, ), np.float32),
            'B': Box(-1.0, 1.0, (20, ), np.float32)
        }),
        Dict({
            'A': Box(-1.0, 1.0, (20, ), np.float32),
            'B': Box(-1.0, 1.0, (40, ), np.float32)
        }),
        Dict({
            'A': Box(-1.0, 1.0, (40, ), np.float32),
            'B': Box(-1.0, 1.0, (20, ), np.float32)
        }),
        Tuple([
            Box(-1.0, 1.0, (20, ), np.float32),
            Box(-1.0, 1.0, (20, ), np.float32)
        ]),
        Tuple([
            Box(-1.0, 1.0, (40, ), np.float32),
            Box(-1.0, 1.0, (20, ), np.float32)
        ]),
        Tuple([
            Box(-1.0, 1.0, (20, ), np.float32),
            Box(-1.0, 1.0, (40, ), np.float32)
        ]),
    ]

    # iterate and compare all combinations
    spaces_range = range(len(spaces))
    for x in spaces_range:
        space1 = spaces[x]
        for y in spaces_range:
            # create copy of Space with random bounds
            space2 = randomize_space_bounds(spaces[y])

            expected_compatible = x == y
            actual_compatible = space1.compatible(space2)
            assert expected_compatible == actual_compatible