Esempio n. 1
0
 def observation_space(self):
     return Box(low=-np.ones(self.params['obs_len']), high=np.ones(self.params['obs_len']))
Esempio n. 2
0
 def observation_space(self):
     return Box(low=-np.inf, high=np.inf, shape=(7, ))
Esempio n. 3
0
 def _mod_hfield(self, hfield):
     # clear a flat patch for the robot to start off from
     return terrain.clear_patch(hfield, Box(np.array([-1.0, -1.0]), np.array([-0.5, -0.5])))
Esempio n. 4
0
 def action_space(self):
     return Box(low=np.array([-1, -1, -1]), high=np.array([1, 1, 1]))
Esempio n. 5
0
 def action_space(self):
     """allowed value range for actions"""
     return Box(low=-self.delta_l, high=self.delta_l, shape=(3, ))
class SawyerReachTorqueEnv(MujocoEnv, Serializable, MultitaskEnv):
    """Implements a torque-controlled Sawyer environment"""
    def __init__(
        self,
        frame_skip=10,
        action_scale=10,
        xyz_obs=False,
        keep_vel_in_obs=True,
        use_safety_box=False,
        fix_goal=False,
        fixed_goal=(0.05, 0.6, 0.15),
        reward_type='hand_distance',
        ctrl_cost_coef=0.0,
        indicator_threshold=.05,
        goal_low=None,
        goal_high=None,
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        if goal_low is None:
            goal_low = np.array([-0.1, 0.5, 0.02])
        else:
            goal_low = np.array(goal_low)
        if goal_high is None:
            goal_high = np.array([0.1, 0.7, 0.2])
        else:
            goal_high = np.array(goal_low)
        self.safety_box = Box(goal_low, goal_high)
        self.xyz_obs = xyz_obs
        self.keep_vel_in_obs = keep_vel_in_obs
        self.ctrl_cost_coef = ctrl_cost_coef
        self.goal_space = Box(goal_low, goal_high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.achieved_goal_space = Box(-np.inf * np.ones(3),
                                       np.inf * np.ones(3))
        self._observation_space_dict = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.achieved_goal_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.achieved_goal_space),
        ])

        # obs space: (current_xyz_pos, desired_xyz_pos)
        self.observation_space = Box(np.concatenate([high, goal_low]),
                                     np.concatenate([low, goal_high]))
        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.use_safety_box = use_safety_box
        self.prev_qpos = self.init_angles.copy()
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold
        goal = self.sample_goal()
        self._state_goal = goal['state_desired_goal']
        self.reset()

    @property
    def model_name(self):
        return 'sawyer_reach_torque.xml'

    def reset_to_prev_qpos(self):
        angles = self.data.qpos.copy()
        velocities = self.data.qvel.copy()
        angles[:] = self.prev_qpos.copy()
        velocities[:] = 0
        self.set_state(angles.flatten(), velocities.flatten())
        self.set_goal_xyz(self._state_goal)

    def is_outside_box(self):
        pos = self.get_endeff_pos()
        return not self.safety_box.contains(pos)

    def set_to_qpos(self, qpos):
        angles = self.data.qpos.copy()
        velocities = self.data.qvel.copy()
        angles[:] = qpos
        velocities[:] = 0
        self.set_state(angles.flatten(), velocities.flatten())
        self.set_goal_xyz(self._state_goal)

    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
        self.viewer.cam.distance = 1.0

        # 3rd person view
        cam_dist = 0.3
        rotation_angle = 270
        cam_pos = np.array([0, 1.0, 0.5, cam_dist, -45, rotation_angle])

        for i in range(3):
            self.viewer.cam.lookat[i] = cam_pos[i]
        self.viewer.cam.distance = cam_pos[3]
        self.viewer.cam.elevation = cam_pos[4]
        self.viewer.cam.azimuth = cam_pos[5]
        self.viewer.cam.trackbodyid = -1

    def step(self, action):
        action = action * self.action_scale
        self.do_simulation(action, self.frame_skip)
        if self.use_safety_box:
            if self.is_outside_box():
                self.reset_to_prev_qpos()
            else:
                self.prev_qpos = self.data.qpos.copy()
        obs_dict = self._get_obs_dict()
        info = self._get_info()
        reward = self.compute_reward(action, obs_dict)
        obs = self._convert_obs_dict_to_obs(obs_dict)
        done = False
        return obs, reward, done, info

    def done(self, obs):
        if obs.ndim == 2:
            return np.zeros(obs.shape[0], dtype=np.bool)
        else:
            return False

    def reward(self, obs, action, obs_next):
        if obs_next.ndim == 2 and action.ndim == 2:
            hand_pos = obs_next[:, 0:3]
            goals = obs_next[:, -3:]
            distance = np.linalg.norm(hand_pos - goals, axis=1)
            ctrl_cost = self.ctrl_cost_coef * np.sum(action**2, axis=1)
            if self.reward_type == 'hand_distance':
                r = -distance
            elif self.reward_type == 'hand_success':
                r = -(distance < self.indicator_threshold).astype(float)
            else:
                raise NotImplementedError("Invalid/no reward type.")
            return r - ctrl_cost
        else:
            return self.reward(np.array([obs]), np.array([action]),
                               np.array([obs_next]))[0]

    def _get_env_obs(self):
        if self.xyz_obs:
            if self.keep_vel_in_obs:
                return np.concatenate([
                    self.get_endeff_pos(),  # end-effector xyz position
                    self.get_endeff_rot(
                    ),  # end-effector rotation (quaternion)
                    self.get_endeff_vel(),  # end-effector xyz velocity
                ])
            else:
                return np.concatenate([
                    self.get_endeff_pos(),
                    self.get_endeff_rot(),
                ])
        else:
            if self.keep_vel_in_obs:
                return np.concatenate([
                    self.get_endeff_pos(),
                    self.sim.data.qpos.flat,
                    self.sim.data.qvel.flat,
                ])
            else:
                return np.concatenate([
                    self.get_endeff_pos(),
                    self.sim.data.qpos.flat,
                ])

    def _get_obs_dict(self):
        ee_pos = self.get_endeff_pos()
        state_obs = self._get_env_obs()
        return dict(
            observation=state_obs,
            desired_goal=self._state_goal,
            achieved_goal=ee_pos,
            state_observation=state_obs,
            state_desired_goal=self._state_goal,
            state_achieved_goal=ee_pos,
        )

    def _convert_obs_dict_to_obs(self, obs_dict):
        return np.concatenate(
            [obs_dict['observation'], obs_dict['desired_goal']])

    def _get_obs(self):
        return self._convert_obs_dict_to_obs(self._get_obs_dict())

    def _get_info(self):
        hand_distance = np.linalg.norm(self._state_goal -
                                       self.get_endeff_pos())
        return dict(
            hand_distance=hand_distance,
            hand_success=float(hand_distance < self.indicator_threshold),
        )

    def get_endeff_pos(self):
        return self.data.body_xpos[self.endeff_id].copy()

    def get_endeff_rot(self):
        return self.data.body_xquat[self.endeff_id].copy()

    def get_endeff_vel(self):
        return self.data.body_xvelp[self.endeff_id].copy()

    def reset(self, reset_args=None):
        angles = self.data.qpos.copy()
        velocities = self.data.qvel.copy()
        angles[:] = self.init_angles
        velocities[:] = 0
        self.set_state(angles.flatten(), velocities.flatten())
        goal = self.sample_goal()
        self._state_goal = goal['state_desired_goal']
        self.set_goal_xyz(self._state_goal)
        self.sim.forward()
        self.prev_qpos = self.data.qpos.copy()
        return self._get_obs()

    def set_goal_xyz(self, pos):
        qpos = self.data.qpos.flat.copy()
        qvel = self.data.qvel.flat.copy()
        qpos[7:10] = pos.copy()
        qvel[7:10] = [0, 0, 0]
        self.set_state(qpos, qvel)

    @property
    def init_angles(self):
        return [
            1.02866769e+00, -6.95207647e-01, 4.22932911e-01, 1.76670458e+00,
            -5.69637604e-01, 6.24117280e-01, 3.53404635e+00, 1.07586388e-02,
            6.62018003e-01, 2.09936716e-02, 1.00000000e+00, 3.76632959e-14,
            1.36837913e-11, 1.56567415e-23
        ]

    @property
    def endeff_id(self):
        return self.model.body_names.index('leftclaw')

    @property
    def goal_id(self):
        return self.model.body_names.index('goal')

    def get_diagnostics(self, paths, prefix=''):
        statistics = OrderedDict()
        for stat_name in [
                'hand_distance',
                'hand_success',
        ]:
            stat_name = stat_name
            stat = get_stat_in_paths(paths, 'env_infos', stat_name)
            statistics.update(
                create_stats_ordered_dict(
                    '%s%s' % (prefix, stat_name),
                    stat,
                    always_show_all_stats=True,
                ))
            statistics.update(
                create_stats_ordered_dict(
                    'Final %s%s' % (prefix, stat_name),
                    [s[-1] for s in stat],
                    always_show_all_stats=True,
                ))
        return statistics

    """
    Multitask functions
    """

    @property
    def goal_dim(self) -> int:
        return 3

    def get_goal(self):
        return {
            'desired_goal': self._state_goal,
            'state_desired_goal': self._state_goal,
        }

    def sample_goals(self, batch_size):
        if self.fix_goal:
            goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0)
        else:
            goals = np.random.uniform(
                self.goal_space.low,
                self.goal_space.high,
                size=(batch_size, self.goal_space.low.size),
            )
        return {
            'desired_goal': goals,
            'state_desired_goal': goals,
        }

    def compute_rewards(self, actions, obs):
        achieved_goals = obs['achieved_goal']
        desired_goals = obs['desired_goal']
        hand_pos = achieved_goals
        goals = desired_goals
        ctrl_cost = self.ctrl_cost_coef * np.sum(np.abs(actions), axis=1)

        distances = np.linalg.norm(hand_pos - goals, axis=1)
        if self.reward_type == 'hand_distance':
            r = -distances
        elif self.reward_type == 'hand_success':
            r = -(distances < self.indicator_threshold).astype(float)
        else:
            raise NotImplementedError("Invalid/no reward type.")
        return r - ctrl_cost

    def set_to_goal(self, goal):
        raise NotImplementedError()

    def get_env_state(self):
        joint_state = self.sim.get_state()
        goal = self._state_goal.copy()
        return joint_state, goal

    def set_env_state(self, state):
        state, goal = state
        self.sim.set_state(state)
        self.sim.forward()
        self._state_goal = goal

    def log_diagnostics(self, paths):
        diagnostics = self.get_diagnostics(paths)
        logger.record_tabular('HandDistanceMean',
                              diagnostics['hand_distance Mean'])
        logger.record_tabular('FinalHandDistanceMean',
                              diagnostics['Final hand_distance Mean'])
        logger.record_tabular('FinalHandSuccessMean',
                              diagnostics['Final hand_success Mean'])
Esempio n. 7
0
 def observation_space(self):
     high = np.full((26, ), 1e20)
     low = -high
     return Box(low, high)
Esempio n. 8
0
 def action_space(self):
     return Box(low=-0.3, high=0.3, shape=(15, ))
Esempio n. 9
0
 def observation_space(self):
     return Box(low=-np.ones(4), high=np.ones(4))
Esempio n. 10
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            obj_init_pos=(0, 0.6, 0.02),
            fix_goal=True,
            fixed_goal=(0, 0.85, 0.02, 0, 0.85,
                        0.02),  #3D placing goal, for hand and object
            height_target=0.1,
            goal_low=None,
            goal_high=None,
            hide_goal_markers=False,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.max_path_length = 150

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.height_target = height_target

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )

        self.observation_space = Box(
            np.hstack((self.hand_and_obj_space.low,
                       np.array(GRIPPER_STATE_LIMIT[0]))),
            np.hstack((self.hand_and_obj_space.high,
                       np.array(GRIPPER_STATE_LIMIT[1]))),
        )

        self.goal_space = Box(goal_low, goal_high)

        self._observation_space_dict = Dict([
            ('observation', self.observation_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.observation_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])
        self.observation_space = Box(
            np.concatenate(
                [self.observation_space.low, self.hand_and_obj_space.low]),
            np.concatenate(
                [self.observation_space.high, self.hand_and_obj_space.high]))

        self.reset()
Esempio n. 11
0
 def observation_space(self):
     return Box(low=-1e6, high=1e6, shape=(self.usedDim * self.window, ))
Esempio n. 12
0
 def observation_space(self):
     return Box(self.low, self.high)
Esempio n. 13
0
 def observation_space(self):
     return Box(low=-10 * np.ones(2), high=10 * np.ones(2))
Esempio n. 14
0
 def observation_space(self):
     return Box(np.array([-1, -1, 0]), np.array([1000, 100, np.inf]))
Esempio n. 15
0
 def observation_space(self):
     obs_limit = np.inf * np.ones(self.obs_dim)
     return Box(-obs_limit, obs_limit)
Esempio n. 16
0
 def action_space(self):
     return Box(low=-np.ones(2), high=np.ones(2))
Esempio n. 17
0
 def action_space(self):
     # return Box(low=-np.ones(2), high=np.ones(2))
     return Box(low=np.array([0.0, -1]), high=np.array([1.0, 1]))
 def action_space(self):
     return Box(low=5.0 * self.mu1, high=5.0 * self.mu2, shape=(1, ))
    def __init__(
        self,
        frame_skip=10,
        action_scale=10,
        xyz_obs=False,
        keep_vel_in_obs=True,
        use_safety_box=False,
        fix_goal=False,
        fixed_goal=(0.05, 0.6, 0.15),
        reward_type='hand_distance',
        ctrl_cost_coef=0.0,
        indicator_threshold=.05,
        goal_low=None,
        goal_high=None,
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        if goal_low is None:
            goal_low = np.array([-0.1, 0.5, 0.02])
        else:
            goal_low = np.array(goal_low)
        if goal_high is None:
            goal_high = np.array([0.1, 0.7, 0.2])
        else:
            goal_high = np.array(goal_low)
        self.safety_box = Box(goal_low, goal_high)
        self.xyz_obs = xyz_obs
        self.keep_vel_in_obs = keep_vel_in_obs
        self.ctrl_cost_coef = ctrl_cost_coef
        self.goal_space = Box(goal_low, goal_high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.achieved_goal_space = Box(-np.inf * np.ones(3),
                                       np.inf * np.ones(3))
        self._observation_space_dict = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.achieved_goal_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.achieved_goal_space),
        ])

        # obs space: (current_xyz_pos, desired_xyz_pos)
        self.observation_space = Box(np.concatenate([high, goal_low]),
                                     np.concatenate([low, goal_high]))
        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.use_safety_box = use_safety_box
        self.prev_qpos = self.init_angles.copy()
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold
        goal = self.sample_goal()
        self._state_goal = goal['state_desired_goal']
        self.reset()
Esempio n. 20
0
 def action_space(self):
     return Box(low=-1, high=1, shape=(self.n_size * self.n_size, ))
Esempio n. 21
0
 def observation_space(self):
     return Box(low=np.array([0, -self.tensioner.max_displacement, -self.tensioner.max_displacement, -self.tensioner.max_displacement]),
         high=np.array([len(self.trajectory) + 1, self.tensioner.max_displacement, self.tensioner.max_displacement, self.tensioner.max_displacement]))
Esempio n. 22
0
 def observation_space(self):
     '''
     Define the shape of input vector to the neural network.
     '''
     return Box(low=-np.inf, high=np.inf, shape=(5, ))
Esempio n. 23
0
 def observation_space(self):
     """allowed value range for states"""
     return Box(low=-np.inf, high=np.inf, shape=(10, ))
Esempio n. 24
0
    def __init__(self,
                 init_puck_low=[0.0, 0.6],
                 init_puck_high=[0.0, 0.6],
                 reward_type='hand_and_puck_distance',
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 puck_goal_low=[-0.3, 0.55],
                 puck_goal_high=[-0.1, 0.75],
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if init_puck_low is None:
            init_puck_low = self.hand_low[:2]
        if init_puck_high is None:
            init_puck_high = self.hand_high[:2]
        init_puck_low = np.array(init_puck_low)
        init_puck_high = np.array(init_puck_high)

        self.puck_low = init_puck_low
        self.puck_high = init_puck_high
        """ Set goal sampling range """
        if puck_goal_low is None:
            goal_low = np.hstack((self.hand_low, self.hand_low[:2]))
        else:
            goal_low = np.hstack((self.hand_low, puck_goal_low))
        if puck_goal_high is None:
            goal_high = np.hstack((self.hand_high, self.hand_high[:2]))
        else:
            goal_high = np.hstack((self.hand_high, puck_goal_high))
        self.hand_and_puck_goal_space = Box(goal_low, goal_high)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, self.hand_low[0:2])),
            np.hstack((self.hand_high, self.hand_high[0:2])),
        )

        self.hand_space = Box(self.hand_low, self.hand_high)
        self._observation_space_dict = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = self.get_puck_pos()[2]
        self.observation_space = Box(
            np.concatenate([
                self.hand_and_puck_space.low, self.hand_and_puck_goal_space.low
            ]),
            np.concatenate([
                self.hand_and_puck_space.high,
                self.hand_and_puck_goal_space.high
            ]))
 def action_space(self):
     return Box(low=-0.1, high=0.1, shape=(2,))
 def action_space(self):
     return Box(low=-1, high=1, shape=(3, ))
Esempio n. 27
0
 def latent_space(self):
     return Box(low=-np.inf, high=np.inf, shape=(1, ))
Esempio n. 28
0
 def action_space(self):
     return Box(low=self.act_limits_array[:, 0],
                high=self.act_limits_array[:, 1])
Esempio n. 29
0
def test_box():
    space = Box(low=-1, high=1, shape=(2, 2))
    np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4])
    np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]])
    np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]])
    np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]])
Esempio n. 30
0
 def observation_space(self):
     return Box(low=-1,
                high=1,
                shape=(self.params['grid_n'], self.params['grid_m'], 3))