Beispiel #1
0
def render_pose(pose: np.ndarray,
                viewer,
                label="",
                size=0.2,
                unique_id=None,
                unique_label=False):

    if viewer is None:
        return

    rgba = np.r_[np.random.uniform(0.2, 1.0, 3), 1.]
    extra_kwargs = dict()
    if unique_label:
        unique_id = hash(label) % np.iinfo(np.int32).max
    if unique_id is not None:
        extra_kwargs['dataid'] = unique_id
        with viewer._gui_lock:
            existing = [
                i for i, m in enumerate(viewer._markers)
                if m.get('dataid') == unique_id
            ]
            if len(existing) == 1:
                rgba = viewer._markers[existing[0]]['rgba']
            for i in existing[::-1]:
                del viewer._markers[i]

    pos = pose[:3]
    if pose.size == 6:
        quat = rotations.euler2quat(pose[3:])
    elif pose.size == 7:
        quat = pose[3:]
    else:
        viewer.add_marker(pos=pos,
                          label=label,
                          type=mj_const.GEOM_SPHERE,
                          size=np.ones(3) * 0.01,
                          rgba=rgba,
                          specular=0.,
                          **extra_kwargs)
        return
    for i in range(3):
        rgba = np.zeros(4)
        rgba[[i, 3]] = 1.
        tf = [
            np.r_[0., np.pi / 2, 0.], np.r_[np.pi / 2, np.pi / 1, 0.],
            np.r_[0., 0., 0.]
        ][i]
        rot = rotations.quat_mul(quat, rotations.euler2quat(tf))
        viewer.add_marker(pos=pos,
                          mat=rotations.quat2mat(rot).flatten(),
                          label=(label if i == 0 else ""),
                          type=mj_const.GEOM_ARROW,
                          size=np.r_[0.01, 0.01, size],
                          rgba=rgba,
                          specular=0.,
                          **extra_kwargs)
Beispiel #2
0
    def set_initial_pose(self):
        self.data.set_joint_qpos('yumi_joint_1_l', 1.)
        self.data.set_joint_qpos('yumi_joint_1_r', -1.)
        self.data.set_joint_qpos('yumi_joint_2_l', 0.1)
        self.data.set_joint_qpos('yumi_joint_2_r', 0.1)
        ''' randomize goal
        goal_id = self.model.body_name2id('goal')
        pos = self.model.body_pos[goal_id]
        pos[0:2] = [0.5, 0]
        pos[0:2] += np.random.uniform(-0.05, 0.05, size=(2,))
        self.model.body_pos[goal_id] = pos
        '''

        self.sim.forward()
        target_start, target_end = self.model.get_joint_qpos_addr('target')
        target_qpos = self.data.qpos[target_start:target_end]

        target_quat = target_qpos[3:]

        if self.task == 0:
            # rotate y=-90 deg
            quat = rotations.euler2quat(np.array([0, -np.pi / 2, 0]))
            z_idx = 0
        elif self.task == 1:
            # rotate x=90 deg
            quat = rotations.euler2quat(np.array([np.pi / 2, 0, 0]))
            z_idx = 1
        elif self.task == 2:
            # do nothing
            quat = rotations.euler2quat(np.array([0, 0, 0]))
            z_idx = 2
        elif self.task == 3:
            # rotate z=-90 deg
            quat = rotations.euler2quat(np.array([0, 0, -np.pi / 2]))
            z_idx = 2
        else:
            raise Exception('Additional tasks not implemented.')

        target_id = self.model.geom_name2id('target')
        target_qpos[0] = 0.5 + np.random.uniform(-0.01, 0.01)
        target_qpos[1] = np.random.uniform(0.14, 0.15)
        height = self.model.geom_size[target_id][z_idx]
        target_qpos[2] = 0.051 + height

        goal_id = self.model.body_name2id('goal')
        body_pos = self.model.body_pos[goal_id]
        body_quat = self.model.body_quat[goal_id]
        body_pos[2] = target_qpos[2]
        body_quat[:] = quat

        perturbation = np.zeros(3)
        perturbation[z_idx] = np.random.uniform(-0.2, 0.2)
        euler = rotations.quat2euler(quat)
        euler = rotations.subtract_euler(euler, perturbation)
        target_qpos[3:] = rotations.euler2quat(euler)
Beispiel #3
0
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a,
                                           rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Beispiel #5
0
    def __init__(self,
                 target_position,
                 target_rotation,
                 skip_frame=20,
                 model_path='rubik_cube_shadow_wo_target.xml',
                 initial_robot_pos_dict=DEFAULT_INITIAL_QPOS,
                 reward_type='sparse',
                 randomize_initial_position=True,
                 randomize_initial_rotation=True,
                 distance_threshold=0.01,
                 rotation_threshold=0.1,
                 target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02),
                                                 (0.0, 0.06)])):
        """Initializes a new ManipulateEnv environment.

        Args:
            model_path (string): path to the environment XML file
            initial_robot_pos_dict (dict): a dictionary of joint names and values that define the initial configuration
            target_position (string): the type of target porisiton:
                - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
                - fixed: target position is set to the initial position of the target
                - random: target position is fully randomized according to target_postion_range
            target_rotation (string): the type of target rotation:
                - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
                - fixed: target rotation is set to the initial rotation of the object
                - xyz: fully randomized target rotation around the X, Y and Z axis
                - z: fully randomized target rotation around the Z axis
                - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
            target_position_range (np.array of shape (3,2)): range of the target_position randomization
            reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
            randomize_initial_position (boolean): whether or not to randomize the initial position of the object
            randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
            distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
            rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
        """
        self.target_position = target_position
        self.target_rotation = target_rotation
        self.target_position_range = target_position_range
        self.reward_type = reward_type
        self.randomize_initial_position = randomize_initial_position
        self.randomize_initial_rotation = randomize_initial_rotation
        self.distance_threshold = distance_threshold
        self.rotation_threshold = rotation_threshold
        self.parallel_quats = [
            rotations.euler2quat(r)
            for r in rotations.get_parallel_rotations()
        ]

        assert self.reward_type in ['dense', 'sparse']
        assert self.target_position in ['ignore', 'fixed', 'random']
        assert self.target_rotation in [
            'ignore', 'fixed', 'xyz', 'z', 'parallel', 'rotFace'
        ]

        ShadowGoalBaseEnv.__init__(self, model_path, initial_robot_pos_dict,
                                   skip_frame)
Beispiel #6
0
    def __init__(
        self, model_path, target_position, target_rotation,
        target_position_range, reward_type, initial_qpos={},
        randomize_initial_position=True, randomize_initial_rotation=True,
        distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
        ignore_z_target_rotation=False,
    ):
        """Initializes a new Hand manipulation environment.

        Args:
            model_path (string): path to the environments XML file
            target_position (string): the type of target position:
                - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
                - fixed: target position is set to the initial position of the object
                - random: target position is fully randomized according to target_position_range
            target_rotation (string): the type of target rotation:
                - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
                - fixed: target rotation is set to the initial rotation of the object
                - xyz: fully randomized target rotation around the X, Y and Z axis
                - z: fully randomized target rotation around the Z axis
                - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
            ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
            target_position_range (np.array of shape (3, 2)): range of the target_position randomization
            reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
            initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
            randomize_initial_position (boolean): whether or not to randomize the initial position of the object
            randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
            distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
            rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
            n_substeps (int): number of substeps the simulation runs on every call to step
            relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
        """
        self.target_position = target_position
        self.target_rotation = target_rotation
        self.target_position_range = target_position_range
        self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
        self.randomize_initial_rotation = randomize_initial_rotation
        self.randomize_initial_position = randomize_initial_position
        self.distance_threshold = distance_threshold
        self.rotation_threshold = rotation_threshold
        self.reward_type = reward_type
        self.ignore_z_target_rotation = ignore_z_target_rotation

        assert self.target_position in ['ignore', 'fixed', 'random']
        assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']

        hand_env.HandEnv.__init__(
            self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
            relative_control=relative_control)
        utils.EzPickle.__init__(self)
Beispiel #7
0
    def __init__(
        self, model_path, target_position, target_rotation,
        target_position_range, reward_type, initial_qpos=None,
        randomize_initial_position=True, randomize_initial_rotation=True,
        distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
        ignore_z_target_rotation=False,
    ):
        """Initializes a new Hand manipulation environment.

        Args:
            model_path (string): path to the environments XML file
            target_position (string): the type of target position:
                - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
                - fixed: target position is set to the initial position of the object
                - random: target position is fully randomized according to target_position_range
            target_rotation (string): the type of target rotation:
                - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
                - fixed: target rotation is set to the initial rotation of the object
                - xyz: fully randomized target rotation around the X, Y and Z axis
                - z: fully randomized target rotation around the Z axis
                - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
            ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
            target_position_range (np.array of shape (3, 2)): range of the target_position randomization
            reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
            initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
            randomize_initial_position (boolean): whether or not to randomize the initial position of the object
            randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
            distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
            rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
            n_substeps (int): number of substeps the simulation runs on every call to step
            relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
        """
        self.target_position = target_position
        self.target_rotation = target_rotation
        self.target_position_range = target_position_range
        self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
        self.randomize_initial_rotation = randomize_initial_rotation
        self.randomize_initial_position = randomize_initial_position
        self.distance_threshold = distance_threshold
        self.rotation_threshold = rotation_threshold
        self.reward_type = reward_type
        self.ignore_z_target_rotation = ignore_z_target_rotation

        assert self.target_position in ['ignore', 'fixed', 'random']
        assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']
        initial_qpos = initial_qpos or {}

        hand_env.HandEnv.__init__(
            self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
            relative_control=relative_control)
    def _sample_goal(self):
        # Select a goal for the object position.
        if self.target_position == 'random':
            assert self.target_position_range.shape == (3, 2)
            offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1])
            assert offset.shape == (3,)
            target_pos = self.sim.data.get_site_xpos('rubik:cube_center') + offset
        elif self.target_position in ['ignore', 'fixed']:
            # target_pos = self.sim.data.get_site_xpos('rubik:cube_center')
            target_pos = np.array([1, 0.87, 0.2])
        else:
            raise error.Error('Unknown target_position option "{}".'.format(self.target_position))
        assert target_pos.shape == (3,)

        # Select a goal for the object rotation
        if self.target_rotation == 'z':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = np.array([0., 0., 1.])
            target_quat = quat_from_angle_and_axis(angle, axis)
        elif self.target_rotation == 'parallel':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = np.array([0., 0., 1.])
            target_quat = quat_from_angle_and_axis(angle, axis)
            parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
            target_quat = rotations.quat_mul(target_quat, parallel_quat)
        elif self.target_rotation == 'xyz':
            angle = self.np_random.uniform(-np.pi, np.pi)
            axis = self.np_random.uniform(-1., 1., size=3)
            target_quat = quat_from_angle_and_axis(angle, axis)
        elif self.target_rotation in ['ignore', 'fixed']:
            target_quat = self.sim.data.get_joint_qpos('rubik:free_joint_0_0_0')[-4:]
        elif self.target_rotation in ['rotFace']:
            target_quats = [rotations.euler2quat(r) for r in np.array([[0, 0, np.pi/2.],
                                                                       [0, 0, -np.pi/2.],
                                                                       [np.pi / 2., 0, 0],
                                                                       [-np.pi / 2., 0, 0],
                                                                       [0, np.pi/2., 0],
                                                                       [0, -np.pi/2., 0]])]
            parallel_quat = target_quats[self.np_random.randint(len(target_quats))]
            target_quat = self.sim.data.get_joint_qpos('rubik:free_joint_0_0_0')[-4:]
            target_quat = rotations.quat_mul(target_quat, parallel_quat)
        else:
            raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
        assert target_quat.shape == (4,)

        target_quat /= np.linalg.norm(target_quat)
        goal = np.concatenate([target_pos, target_quat])
        return goal
Beispiel #9
0
 def camera_randomization(self):
     cam_pos = [[0.99, 0.5, 1.0], [0.5, 0.0, 1.99]]  #1m x 1m
     cam_ori = [[0.0, 1.57, 1.57], [0, 0, 0]]
     cam_fovy = [60, 60]
     for i in range(len(cam_pos)):
         for j in range(len(cam_pos[0])):
             cam_pos[i][j] += randrange(-15, 15) / 1000
     ori_dim = len(cam_ori[0])
     for i in range(len(cam_ori)):
         for j in range(ori_dim):
             cam_ori[i][j] += randrange(-17, 17) / 1000
     for i in range(len(cam_fovy)):
         cam_fovy[i] += randrange(-100, 100) / 100
     self.model.cam_pos[:, :] = np.array(cam_pos)
     self.model.cam_quat[:, :] = np.array(euler2quat(cam_ori))
     self.model.cam_fovy[:] = np.array(cam_fovy)
Beispiel #10
0
    def _goal_distance_single(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_rot = np.zeros_like(goal_b[..., 0])
        d_pos = np.linalg.norm(goal_a[..., :3] - goal_b[..., :3], axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
            if self.ignore_z_target_rotation:
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)
            quat_diff = rotations.quat_mul(quat_a,
                                           rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        return d_pos, d_rot
Beispiel #11
0
    def step(cls, viewer):

        q1 = rotations.euler2quat(np.r_[0.01, 0.01, 0.05])
        q0 = cls.ob1_pose[3:].copy()
        q_common = rotations.quat_mul(q0, q1)

        cls.ob1_pose[3:] = q_common.copy()
        render_pose(cls.ob1_pose, viewer, unique_id=cls.unique_ids[0].item())

        cls.ob2_pose[3:] = q_common.copy()
        render_pose(cls.ob2_pose, viewer, unique_id=cls.unique_ids[1].item())

        if cls.i < 37:
            cls.tf = get_tf(cls.ob1_pose, cls.ob2_pose)  # 2_to_1
            cls.i += 1
        else:
            ob1_pose_back = apply_tf(cls.tf, cls.ob2_pose)
            ob1_pose_back[2] -= 0.0
            render_pose(apply_tf(cls.tf, cls.ob2_pose),
                        viewer,
                        unique_id=cls.unique_ids[2].item())
Beispiel #12
0
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False):
    assert goal_a.shape == goal_b.shape
    assert goal_a.shape[-1] == 7
    d_rot = np.zeros_like(goal_b[..., 0])
    quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

    if ignore_z_target_rotation:
        # Special case: We want to ignore the Z component of the rotation.
        # This code here assumes Euler angles with xyz convention. We first transform
        # to euler, then set the Z component to be equal between the two, and finally
        # transform back into quaternions.
        euler_a = rotations.quat2euler(quat_a)
        euler_b = rotations.quat2euler(quat_b)
        euler_a[2] = euler_b[2]
        quat_a = rotations.euler2quat(euler_a)

    # Subtract quaternions and extract angle between them.
    quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
    angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
    d_rot = angle_diff

    return d_rot
Beispiel #13
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self._reset(new_goal)
            # TFDebugger.reset()
        # TFDebugger.step(self._raw_env.viewer)

        curr_grp_poses = {
            a:
            np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'),
                  rotations.mat2quat(
                      self._raw_env.sim.data.
                      get_site_xmat(f'gripper_{a}_center')), ]
            for a in ('l', 'r')
        }

        pos_errors = []
        for arm in ('l', 'r'):
            if arm == 'l':
                solver = self._ikp_solver_l
                jac_solver = self._jac_solver_l
                joint_lims = self._raw_env.arm_l_joint_lims

                achieved_pos = obs['observation'][32:35]
                obj_l_rel_pos = obs['observation'][44:47]
                obj_pos = obj_l_rel_pos + achieved_pos

                target_pose = np.r_[obj_pos, np.pi - 0.9, 0.01, 0.01]
                target_pose = np.r_[target_pose[:3],
                                    rotations.euler2quat(target_pose[3:])]

                if self._phase == 3:
                    target_pose[:3] = self._raw_env.sim.data.get_site_xpos(
                        'target0:left').copy()
                    q = rotations.mat2quat(
                        self._raw_env.sim.data.get_site_xmat('target0:left'))
                    target_pose[3:] = rotations.quat_mul(target_pose[3:], q)

                prev_err = self._prev_err_l
                curr_q = obs['observation'][:7]
                u_masked = u[:7]
            elif arm == 'r':
                solver = self._ikp_solver_r
                jac_solver = self._jac_solver_r
                joint_lims = self._raw_env.arm_r_joint_lims

                achieved_pos = obs['observation'][35:38]
                obj_r_rel_pos = obs['observation'][47:50]
                obj_pos = obj_r_rel_pos + achieved_pos

                target_pose = np.r_[obj_pos, -np.pi + 0.9, 0.01, np.pi]
                target_pose = np.r_[target_pose[:3],
                                    rotations.euler2quat(target_pose[3:])]

                if self._phase == 3:
                    target_pose[:3] = self._raw_env.sim.data.get_site_xpos(
                        'target0:right').copy()
                    q = rotations.mat2quat(
                        self._raw_env.sim.data.get_site_xmat('target0:right'))
                    target_pose[3:] = rotations.quat_mul(q, target_pose[3:])

                prev_err = self._prev_err_r
                curr_q = obs['observation'][16:23]
                u_masked = u[8:15]
            else:
                continue

            if self._phase == 0:
                u[7] = -1.0
                u[15] = -1.0
                target_pose[2] += 0.1
            elif self._phase == 1:
                u[7] = -1.0
                u[15] = -1.0
                target_pose[2] -= 0.002
            elif self._phase == 2:
                u[7] = -1 + self._phase_steps / 3.
                u[15] = -1 + self._phase_steps / 3.
            elif self._phase == 3:
                u[7] = 1.0
                u[15] = 1.0

            if self._raw_env.viewer is not None:
                tf.render_pose(target_pose.copy(), self._raw_env.viewer)

            curr_pose = curr_grp_poses[arm].copy()
            err_pose = curr_pose - target_pose
            err_pos = np.linalg.norm(err_pose[:3])
            pos_errors.append(err_pos)

            if self.use_mocap_ctrl:
                if self._phase < 1:
                    controller_k = 3.0
                else:
                    controller_k = 2.5
                err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:])
                target_q = self._raw_env.mocap_ik(-err_pose, arm)
            else:
                controller_k = 0.1
                err_rot = 0.0
                target_pose[:3] -= self._base_offset
                if self.use_qp_solver:
                    if not self.check_joint_limits:
                        joint_lims = None
                    curr_pose[:3] -= self._base_offset
                    target_q = self._position_ik_qp(curr_pose, target_pose,
                                                    curr_q, jac_solver,
                                                    joint_lims)
                else:
                    target_q = self._position_ik(target_pose, curr_q, solver)

            if target_q is not None:
                err_q = curr_q - target_q
                u_masked[:] = self._controller(err_q, prev_err, controller_k)

        self._phase_steps += 1
        if self._phase == 0 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.05:
            self._phase = 1
            self._phase_steps = 0
        elif self._phase == 1 and np.all(
                np.array(pos_errors) < 0.007) and err_rot < 0.05:
            self._phase = 2
            self._phase_steps = 0
        elif self._phase == 2:
            if self._phase_steps > 6:
                self._phase = 3
                self._phase_steps = 0
                self._locked_l_to_r_tf = tf.get_tf(curr_grp_poses['r'],
                                                   curr_grp_poses['l'])

        u = np.clip(u, self._env.action_space.low, self._env.action_space.high)
        return u
Beispiel #14
0
    def __init__(self,
                 model_path,
                 reward_type,
                 initial_qpos=None,
                 relative_control=False,
                 has_object=False,
                 randomize_initial_arm_pos=False,
                 randomize_initial_object_pos=True,
                 ignore_rotation_ctrl=False,
                 distance_threshold=0.05,
                 rotation_threshold=0.1,
                 n_substeps=20,
                 ignore_target_rotation=False,
                 success_on_grasp_only=False,
                 grasp_state=None,
                 grasp_state_reset_p=0.0,
                 target_in_the_air_p=0.5,
                 object_id='original',
                 object_cage=False,
                 cage_opacity=0.1,
                 weld_fingers=False):

        self.target_in_the_air_p = target_in_the_air_p
        self.has_object = has_object
        self.ignore_target_rotation = ignore_target_rotation
        self.randomize_initial_arm_pos = randomize_initial_arm_pos
        self.randomize_initial_object_pos = randomize_initial_object_pos
        self.ignore_rotation_ctrl = ignore_rotation_ctrl
        self.distance_threshold = distance_threshold
        self.rotation_threshold = rotation_threshold
        self.reward_type = reward_type
        self.success_on_grasp_only = success_on_grasp_only
        self.object_id = object_id
        self.forearm_bounds = (np.r_[0.65, 0.3, 0.42], np.r_[1.75, 1.2, 1.0])
        self.table_safe_bounds = (np.r_[1.10, 0.43], np.r_[1.49, 1.05])
        self._initial_arm_mocap_pose = np.r_[1.05, 0.75, 0.65,
                                             rotations.euler2quat(np.r_[0.,
                                                                        1.59,
                                                                        1.57])]

        if isinstance(grasp_state, bool) and grasp_state:
            suffix = "" if object_id == 'original' else f'_{object_id}'
            p = os.path.join(os.path.dirname(__file__),
                             f'../assets/states/grasp_state{suffix}.pkl')
            if not os.path.exists(p):
                raise IOError('File {} does not exist'.format(p))
            grasp_state = pickle.load(open(p, 'rb'))

        if grasp_state is not None and grasp_state_reset_p <= 0.0:
            raise ValueError(
                'grasp_state_reset_p must be greater than zero if grasp_state is specified!'
            )

        self.grasp_state = grasp_state
        self.grasp_state_reset_p = grasp_state_reset_p

        if ignore_rotation_ctrl and not ignore_target_rotation:
            raise ValueError(
                'Target rotation must be ignored if arm cannot rotate! Set ignore_target_rotation=True'
            )

        if success_on_grasp_only:
            if reward_type != 'sparse':
                raise ValueError(
                    'Parameter success_on_grasp_only requires sparse rewards!')
            if not has_object:
                raise ValueError(
                    'Parameter success_on_grasp_only requires object to be grasped!'
                )

        default_qpos = dict()
        xml_format = None
        if self.has_object:
            default_qpos['object:joint'] = [1.25, 0.53, 0.4, 1., 0., 0., 0.]
            xml_format = dict(
                object_geom=
                '<geom name="{name}" {props} material="material:object" condim="4"'
                'friction="1 0.95 0.01" solimp="0.99 0.99 0.01" solref="0.01 1"/>',
                target_geom=
                '<geom name="{name}" {props} material="material:target" condim="4" group="2"'
                'contype="0" conaffinity="0"/>',
            )
            obj = dict(OBJECTS[object_id])
            if 'mass' not in obj.keys():
                obj['mass'] = 0.2
            mesh_parts = obj.get('mesh_parts')
            if mesh_parts is not None and isinstance(mesh_parts, int):
                del obj['mesh_parts']
                object_geom, target_geom = '', ''
                if isinstance(obj['mass'], Sequence):
                    masses = list(obj['mass'])
                    assert len(masses) == mesh_parts
                    del obj['mass']
                else:
                    masses = [obj['mass']] * mesh_parts
                for i in range(mesh_parts):
                    obj_part = dict(obj)
                    obj_part['mesh'] += f'_part{i}'
                    obj_part['mass'] = masses[i]
                    props = " ".join(
                        [f'{k}="{v}"' for k, v in obj_part.items()])
                    object_geom += xml_format['object_geom'].format(
                        name=f'object_part{i}', props=props)
                    target_geom += xml_format['target_geom'].format(
                        name=f'target_part{i}', props=props)
                xml_format = dict(object_geom=object_geom,
                                  target_geom=target_geom)
            else:
                props = " ".join([f'{k}="{v}"' for k, v in obj.items()])
                xml_format = dict(
                    object_geom=xml_format['object_geom'].format(name='object',
                                                                 props=props),
                    target_geom=xml_format['target_geom'].format(name='target',
                                                                 props=props),
                )
            if object_cage:
                rgba = f"1 0 0 {cage_opacity}"
                xml_format['cage'] = '''
                <geom pos="0 0.55 0.4" quat="0.924 0.3826 0 0" size="1 0.75 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/>
                <geom pos="0 -0.55 0.4" quat="0.924 -0.3826 0 0" size="1 0.75 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/>
                <geom pos="-0.45 0 0.4" quat="0.924 0 0.3826 0" size="0.75 1 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/>
                <geom pos="0.45 0 0.4" quat="0.924 0 -0.3826 0" size="0.75 1 0.01" type="box" mass="200" rgba="{}" solimp="0.99 0.99 0.01" solref="0.01 1"/>
                '''.format(*[rgba] * 4)
            else:
                xml_format['cage'] = ''

        if weld_fingers:
            mocap_tp = '''
            <body mocap="true" name="{}:mocap" pos="0 0 0">
                <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"/>
                <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"/>
                <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"/>
                <geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"/>
            </body>
            '''
            weld_tp = '<weld body1="{}:mocap" body2="{}" solimp="0.9 0.95 0.001" solref="0.02 1"/>'
            fingertip_names = [
                x.replace('robot0:', '') for x in FINGERTIP_BODY_NAMES
            ]
            xml_format['finger_mocaps'] = '\n'.join(
                [mocap_tp.format(n) for n in fingertip_names])
            xml_format['finger_welds'] = '\n'.join([
                weld_tp.format(m, f)
                for m, f in zip(fingertip_names, FINGERTIP_BODY_NAMES)
            ])
        else:
            xml_format['finger_welds'] = ''
            xml_format['finger_mocaps'] = ''

        initial_qpos = initial_qpos or default_qpos
        hand_env.HandEnv.__init__(self,
                                  model_path,
                                  n_substeps=n_substeps,
                                  initial_qpos=initial_qpos,
                                  relative_control=relative_control,
                                  arm_control=True,
                                  xml_format=xml_format)
        utils.EzPickle.__init__(self)
Beispiel #15
0
def goToGoal(env, lastObs):

    #goal = self.sampleGoal()
    goal = lastObs['desired_goal']

    #objectPosition
    objectPos = lastObs['observation'][3:6]
    gripperPos = lastObs['observation'][:3]
    gripperState = lastObs['observation'][9:11]
    objectRot = lastObs['observation'][12:15]  # Euler angles
    object_rel_pos = lastObs['observation'][6:9]

    #print("relative position ", object_rel_pos)
    #print("Goal position ", goal)
    #print("gripper Position ", gripperPos)
    #print("Object Position ", objectPos)
    #print("Object rotation ", objectRot)
    #print("Gripper state  ", gripperState)

    episodeAcs = []
    episodeObs = []
    episodeInfo = []

    object_oriented_goal = object_rel_pos.copy()
    object_oriented_goal[2] += 0.03

    print("Max episode steps ", env._max_episode_steps)

    timeStep = 0

    episodeObs.append(lastObs)

    while np.linalg.norm(object_oriented_goal
                         ) >= 0.0025 and timeStep <= env._max_episode_steps:
        env.render()
        action = [0, 0, 0, 0] + [1., 0., 1., 0.]

        object_oriented_goal = object_rel_pos.copy()
        object_oriented_goal[2] += 0.03
        object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]])

        for i in range(len(object_oriented_goal)):
            action[i] = object_oriented_goal[i] * 6

        action[4:] = object_rot_goal
        action[3] = 0.05

        obsDataNew, reward, done, info = env.step(action)
        timeStep += 1

        episodeAcs.append(action)
        episodeInfo.append(info)
        episodeObs.append(obsDataNew)

        # note: don't need to reobserve object orientation, no online correction
        objectPos = obsDataNew['observation'][3:6]
        gripperPos = obsDataNew['observation'][:3]
        gripperState = obsDataNew['observation'][9:11]
        object_rel_pos = obsDataNew['observation'][6:9]

    while np.linalg.norm(
            object_rel_pos) >= 0.0025 and timeStep <= env._max_episode_steps:
        env.render()
        action = [0, 0, 0, 0] + [1., 0., 1., 0.]

        object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]])
        for i in range(len(object_rel_pos)):
            action[i] = object_rel_pos[i] * 6

        action[4:] = object_rot_goal
        action[3] = -0.005

        obsDataNew, reward, done, info = env.step(action)
        timeStep += 1

        episodeAcs.append(action)
        episodeInfo.append(info)
        episodeObs.append(obsDataNew)

        objectPos = obsDataNew['observation'][3:6]
        gripperPos = obsDataNew['observation'][:3]
        gripperState = obsDataNew['observation'][9:11]
        object_rel_pos = obsDataNew['observation'][6:9]

    while np.linalg.norm(
            goal - objectPos) >= 0.001 and timeStep <= env._max_episode_steps:
        env.render()
        action = [0, 0, 0, 0] + [1., 0., 1., 0.]

        object_rot_goal = rotations.euler2quat([0, 0, objectRot[1]])
        for i in range(len(goal - objectPos)):
            action[i] = (goal - objectPos)[i] * 6

        action[4:] = object_rot_goal
        action[3] = -0.005

        obsDataNew, reward, done, info = env.step(action)
        timeStep += 1

        episodeAcs.append(action)
        episodeInfo.append(info)
        episodeObs.append(obsDataNew)

        objectPos = obsDataNew['observation'][3:6]
        gripperPos = obsDataNew['observation'][:3]
        gripperState = obsDataNew['observation'][9:11]
        object_rel_pos = obsDataNew['observation'][6:9]

    while True:
        env.render()
        action = [0, 0, 0, 0] + [1., 0., 1., 0.]

        action[3] = -0.005

        obsDataNew, reward, done, info = env.step(action)
        timeStep += 1

        episodeAcs.append(action)
        episodeInfo.append(info)
        episodeObs.append(obsDataNew)

        objectPos = obsDataNew['observation'][3:6]
        gripperPos = obsDataNew['observation'][:3]
        gripperState = obsDataNew['observation'][9:11]
        object_rel_pos = obsDataNew['observation'][6:9]

        if timeStep >= env._max_episode_steps: break

    #print("Toatal timesteps taken ", timeStep)

    actions.append(episodeAcs)
    observations.append(episodeObs)
    infos.append(episodeInfo)
Beispiel #16
0
def randomize_ur10_xml(var_mass=0.0, var_damp=0.0, var_fr=0.0, var_grav_x_y=0.0, var_grav_z=0.0, var_body_pos=0.00,
                       var_body_rot=0.0, worker_id=1):

    # parameters:
    #
    # var_mass        : mass variance relative to actual mass
    #
    # var_damp        : damping variance relative to the standard value
    # var_fr          : friction variance relative to the standard value
    #
    # var_grav_x_y    : gravity variance in x- and y-direction (absolute)
    # var_grav_z      : gravity variance in z-direction (absolute)
    #
    # var_body_pos    : variance of body position in m per direction
    # var_body_rot    : variance of body rotation in degree per axis
    # ______________________________________________________________________
    #
    # return          :  offset of car_body to consider for goal position
    #
    # ______________________________________________________________________


    main_xml_temp = os.path.join(*[ur_path, "ur10_assembly_setup_rand_temp_{}.xml".format(worker_id)])
    robot_body_rel = os.path.join("ur10_heg", "ur10_body_heg_rand_temp_{}.xml".format(worker_id))
    car_body_rel = os.path.join("ur10_heg", "car_body_rand_temp_{}.xml".format(worker_id))
    defaults_rel = os.path.join("ur10_heg", "ur10_default_rand_temp_{}.xml".format(worker_id))
    robot_body_xml_temp = os.path.join(ur_path, robot_body_rel)
    car_body_xml_temp = os.path.join(ur_path, car_body_rel)
    defaults_xml_temp = os.path.join(ur_path, defaults_rel)

    # load robot body xml and randomize body masses
    tree = ET.parse(robot_body_xml)
    inertials = []
    for elem in tree.iter():
        if elem.tag == 'inertial':      # find inertial elements in xml
            inertials.append(elem)

    for inertial in inertials:          # change masses
        inertial.attrib['mass'] = str((np.random.uniform(-var_mass, +var_mass) + 1) * float(inertial.attrib['mass']))

    tree.write(robot_body_xml_temp)     # save new xml in temp file to be loaded in gym env

    # load defaults xml and randomize joint damping and surface friction
    #damping_arm_rand = damping_arm * (1 + np.random.uniform(-var_damp, +var_damp))
    #damping_wrist_rand = damping_wrist * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_pan_rand = damping_pan * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_lift_rand = damping_lift * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_elbow_rand = damping_elbow * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist1_rand = damping_wrist1 * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist2_rand = damping_wrist2 * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist3_rand = damping_wrist3 * (1 + np.random.uniform(-var_damp, +var_damp))

    friction_body_rand = friction_body * (1 + np.random.uniform(-var_fr, +var_fr, (3,)))
    friction_heg_rand = friction_heg * (1 + np.random.uniform(-var_fr, +var_fr, (3,)))

    friction_body_string = "{} {} {}".format(*friction_body_rand)
    friction_heg_string = "{} {} {}".format(*friction_heg_rand)

    tree = ET.parse(defaults_xml)

    for default in tree.findall('default'):
        if default.attrib['class'] == 'ur10:pan':
            default.findall('joint')[0].attrib['damping'] = str(damping_pan_rand)
        if default.attrib['class'] == 'ur10:lift':
            default.findall('joint')[0].attrib['damping'] = str(damping_lift_rand)
        if default.attrib['class'] == 'ur10:elbow':
            default.findall('joint')[0].attrib['damping'] = str(damping_elbow_rand)
        if default.attrib['class'] == 'ur10:wrist1':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist1_rand)
        if default.attrib['class'] == 'ur10:wrist2':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist2_rand)
        if default.attrib['class'] == 'ur10:wrist3':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist3_rand)
        if default.attrib['class'] == 'body':
            default.findall('geom')[0].attrib['friction'] = friction_body_string
        if default.attrib['class'] == 'heg':
            default.findall('geom')[0].attrib['friction'] = friction_heg_string

    tree.write(defaults_xml_temp)

    # load main xml and randomize gravity, also adapt includes to worker_id
    grav_x = np.random.uniform(-var_grav_x_y, var_grav_x_y)
    grav_y = np.random.uniform(-var_grav_x_y, var_grav_x_y)
    grav_z = np.random.uniform(-var_grav_z, var_grav_z) - 9.81
    grav_string = "{} {} {}".format(grav_x, grav_y, grav_z)

    tree = ET.parse(main_xml)
    tree.findall("option")[0].attrib['gravity'] = grav_string

    for include in tree.findall("default")[0].iter("include"):
        include.attrib['file'] = defaults_rel
    world_temp_files = [car_body_rel, robot_body_rel]
    for i, include in enumerate(tree.findall("worldbody")[0].iter("include")):
        include.attrib['file'] = world_temp_files[i]

    tree.write(main_xml_temp)

    # load car body xml and change position (offset is returned by this function)
    body_euler = normalize_rad(rotations.quat2euler(body_quat))
    body_pos_offset = np.random.uniform(-var_body_pos, var_body_pos, (3,))
    body_euler_offset = np.random.uniform(-var_body_rot/180*np.pi, var_body_rot/180*np.pi, (3,))

    body_pos_rand = body_pos + body_pos_offset
    body_euler_rand = body_euler + body_euler_offset
    body_quat_rand = rotations.euler2quat(body_euler_rand)

    body_pos_rand_string = "{} {} {}".format(*body_pos_rand)
    body_quat_rand_string = "{} {} {} {}".format(*body_quat_rand)

    tree = ET.parse(car_body_xml)
    for body in tree.findall('body'):
        if body.attrib['name'] == 'body_link':
            body.attrib['pos'] = body_pos_rand_string
            body.attrib['quat'] = body_quat_rand_string
    tree.write(car_body_xml_temp)

    offset = {
        'body_pos_offset': body_pos_offset,
        'body_euler_offset': body_euler_offset
    }

    return offset
                                  total_numsteps)
                writer.add_scalar('loss/policy', policy_loss, total_numsteps)
                writer.add_scalar('loss/entropy_loss', ent_loss,
                                  total_numsteps)
                writer.add_scalar('entropy_temprature/alpha', alpha,
                                  total_numsteps)
                updates += 1

        ##############
        if args.pos_control:
            # print("current pos: ",current_pos)
            next_a += current_pos
        elif args.ik_control:
            # print("orig ", next_a)
            # print("euler ", next_a[3:-1])
            quat = euler2quat(next_a[3:-1])
            ee_pos = np.concatenate((next_a[:3], quat))
            joint_pos = c.IK(ee_pos)
            if joint_pos == 0:
                print("no solution")
                #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Implement get_current_joint_pos()
                #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Implement fps()
                current_joint_pos = np.zeros(8)
                next_a = current_joint_pos
            else:
                print("got solution")
                next_a = joint_pos + [next_a[-1]]
            # print("joint_pos ", joint_pos)
            # print("next_a: ", next_a)
        for _ in range(args.action_repeat):
            next_state, reward, done, _ = env.step(next_a)  # Step
Beispiel #18
0
    def predict(self, obs, **kwargs):

        u = np.zeros(self._env.action_space.shape)
        new_goal = obs['desired_goal']
        if self._goal is None or np.any(self._goal != new_goal):
            self.reset(new_goal)

        obj_radius = self._object_size[0]
        obj_achieved_alt = obs['achieved_goal'].item()
        obj_achieved_pose = obs['observation'][44:51]
        if np.all(obj_achieved_pose[3:] == 0):
            obj_achieved_pose[3] = 1.0
        # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O")

        grp_xrot = 0.9 + obj_achieved_alt * 2.0

        curr_grp_poses = {
            a:
            np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'),
                  rotations.mat2quat(
                      self._raw_env.sim.data.
                      get_site_xmat(f'gripper_{a}_center')), ]
            for a in ('l', 'r')
        }

        pos_errors = []
        for arm in ('l', 'r'):

            if arm == 'l':
                transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="L")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_l
                curr_q = obs['observation'][:7]
                u_masked = u[:7]

            elif arm == 'r':
                transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.]
                if self._phase == 3:
                    transf[1] *= 0.9
                    transf[2] = 0.05
                pose = tf.apply_tf(transf, obj_achieved_pose)
                # tf.render_pose(pose, self._raw_env.viewer, label="R")

                grp_target_pos = pose[:3]
                grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi]
                target_pose = np.r_[grp_target_pos,
                                    rotations.euler2quat(grp_target_rot)]

                prev_err = self._prev_err_r
                curr_q = obs['observation'][16:23]
                u_masked = u[8:15]
            else:
                continue

            u[7] = -1.0
            u[15] = -1.0

            if self._phase == 0:
                target_pose[2] += 0.1
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 1:
                target_pose[2] += 0.01
                target_pose[1] += 0.05 * np.sign(target_pose[1])
            elif self._phase == 2:
                target_pose[2] += 0.01
            elif self._phase == 3:
                target_pose[2] += 0.00

            if self._raw_env.viewer is not None:
                tf.render_pose(target_pose.copy(), self._raw_env.viewer)

            curr_pose = curr_grp_poses[arm].copy()
            err_pose = curr_pose - target_pose
            err_pos = np.linalg.norm(err_pose[:3])
            pos_errors.append(err_pos)

            controller_k = 2.0
            err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:])
            target_q = self._raw_env.mocap_ik(-err_pose, arm)

            err_q = curr_q - target_q
            u_masked[:] = self._controller(err_q, prev_err, controller_k)

        self._phase_steps += 1
        if self._phase == 0 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 1
            self._phase_steps = 0
        elif self._phase == 1 and np.all(
                np.array(pos_errors) < 0.03) and err_rot < 0.1:
            self._phase = 2
            self._phase_steps = 0
        elif self._phase == 2:
            if self._phase_steps > 30:
                self._phase = 3
                self._phase_steps = 0

        u = np.clip(u, self._env.action_space.low, self._env.action_space.high)
        return u