예제 #1
0
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)

        # Randomize start position of object.
        if self.has_object:
            object_xpos = self.initial_gripper_xpos[:2]
            while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
                object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)

            object_xpos = np.array([0.7, -0.5]) # + self.np_random.uniform(-0.02, 0.07, size=2)
            object_xpos[0] += self.np_random.uniform(-0.07, 0.4)
            object_xpos[1] += self.np_random.uniform(-0.25, 0.2)
            object_qpos = self.sim.data.get_joint_qpos('object0:joint')
            # object_qpos1 = self.sim.data.get_joint_qpos('object1:joint')
            assert object_qpos.shape == (7,)
            if self.debug_print:
                print("object_xpos0: ", object_xpos)
            # print("object1 pos: ", object_qpos1)
            object_qpos[:2] = object_xpos
            object_qpos[2] = 0.5
            # object_qpos[0] += 0.3
            # object_qpos[1] -= 0.1
            if self.debug_print:
                print("set_joint_qpos object_qpos: ", object_qpos)
            self.sim.data.set_joint_qpos('object0:joint', object_qpos)
            # print("get_body_xquat: ", self.sim.data.get_body_xquat('r_gripper_palm_link'))

        # set random gripper position
        # for i in range(3):
        #     gripper_target[i] += self.np_random.uniform(-0.2, 0.2)
        # print("gripper target random: ", gripper_target)
 

        # set random husky initial position
        base_ctrl = [0.0, 0.0]
        base_ctrl[0] += self.np_random.uniform(-1.0, -0.6) # position
        base_ctrl[1] += self.np_random.uniform(-0.2, 0.2) # rotation
        gripper_control = self.np_random.uniform(-1.0, 1.0)
        gripper_control = self.gripper_format_action(gripper_control)

        gripper_target = np.array([0.5, -0.3, 0.6])
        gripper_target[0] += base_ctrl[0]
        gripper_rotation = np.array([0, 0.707, 0.707, 0]) #(0, 0, -90)
        # for i in range(3):
        gripper_target[0] += self.np_random.uniform(-0.0, 0.1) # x
        gripper_target[1] += self.np_random.uniform(-0.1, 0.1) # y
        gripper_target[2] += self.np_random.uniform(-0.1, 0.1) # z
        self.sim.data.set_mocap_pos('gripper_r:mocap', gripper_target)
        self.sim.data.set_mocap_quat('gripper_r:mocap', gripper_rotation)

        action = np.concatenate([gripper_target, gripper_rotation, base_ctrl, gripper_control])
        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action) # base control + gripper control
        # utils.mocap_set_action(self.sim, action) # arm control in cartesion (x, y, z)

        for _ in range(15):
            self.sim.step()

        self.sim.forward()
        return True
    def _set_action(self, action):

        assert action.shape == (6,)
        action = action.copy()  # ensure that we don't change the action outside of this scope

        deviation = sum(abs(self.sim.data.qpos - self.sim_ctrl_q))
        # print(deviation )
        if  deviation > 0.35:  # reset control to current position if deviation too high
            self.set_force_for_q(self.sim.data.qpos)
            #print('deviation compensated')

        if self.ctrl_type == "joint":
            action *= 0.05  # limit maximum change in position
            # Apply action #scalarsto simulation.
            utils.ctrl_set_action(self.sim, action)
        elif self.ctrl_type == "cartesian":
            dx = action.reshape(6, )

            max_limit = self.dx_max
            # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction
            x_now = numpy.concatenate((self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg")))
            x_then = x_now[:3] + dx[:3]*max_limit

            #diff_now = numpy.array(x_now - self.init_x).reshape(7,)
            diff_then = numpy.array(x_then[:3] - self.init_x[:3])

            barriers_min = numpy.array([-0.4, -0.8,   -0.4])
            barriers_max = numpy.array([0.4,  0.8, 0.4])

            #for i in range(3):
            #    if (barriers_min[i] < diff_then[i] < barriers_max[i]):
            #    dx[i] = dx[i] * max_limit
            #    elif barriers_min[i] > diff_then[i]:
            #        dx[i] = + max_limit
            #    elif barriers_max[i] < diff_then[i]:
            #        dx[i] = - max_limit
            for i in range(6):
                dx[i] = dx[i] * max_limit

            if self.corrective:
                # bias in direction of assembly
                bias_dir = -self.last_obs[:6]
                # print(bias_dir)
                for i in range(3,6):
                    if bias_dir[i] > 0.5:
                        print(i, bias_dir[i])
                    bias_dir[i] = bias_dir[i] # slower rotations
                bias_dir /= numpy.linalg.norm(bias_dir)
                # print(bias_dir)
                dx += bias_dir * max_limit * 0.5
                dx.reshape(6, 1)

            rot_mat = self.sim.data.get_body_xmat('gripper_dummy_heg')
            dx_ = numpy.concatenate([rot_mat.dot(dx[:3]), rot_mat.dot(dx[3:])])  ## transform to right coordinate system
            #dx_[2]+= 1
            dq = self.get_dq(dx_)
            dq += numpy.random.normal(self.dq_mean_si, self.dq_std_si, 6)
            q = self.sim_ctrl_q + dq
        
            self.set_force_for_q(q)
    def _set_action(self, action):
        assert action.shape == (self.n_actions, )  # 6 mobile base
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        print("_set_action:", action)
        pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[3:-1], action[
            -1]
        # pos_ctrl, gripper_ctrl = action[:3], action[3:]

        pos_ctrl *= 0.03  # limit maximum change in position
        base_ctrl *= 0.01
        # rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        rot_ctrl = [0, 0.707, 0.707, 0]  #(0 0 0)
        # rot_ctrl = [0.707, 0.0, 0.0, -0.707] # (0 0 -90)
        # rot_ctrl = np.array([0.5, -0.5, 0.5, -0.5]) #(-90, 90, 0)
        # rot_ctrl = np.array([0.5, 0.5, 0.5, -0.5]) #(90, 0, 90) gripper down
        # rot_ctrl = np.array([0.707, 0.0, 0.0, -0.707]) #(0, 0, -90)
        # gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        if self.gripper_close:
            gripper_ctrl = -1.0
        else:
            gripper_ctrl = 1.0
        gripper_ctrl = self.gripper_format_action(gripper_ctrl)
        # assert gripper_ctrl.shape == (2,)
        assert gripper_ctrl.shape == (self.gripper_actual_dof, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl])
        # action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim,
                              action)  # base control + gripper control
        utils.mocap_set_action(self.sim,
                               action)  # arm control in cartesion (x, y, z)
예제 #4
0
    def _set_action(self, ac):
        current_grid_cell = self._get_obs()['observation']
        pos_ctrl, gripper_ctrl = np.zeros(3), np.zeros(1)
        if ac == 0:
            if current_grid_cell[0] >= 1:
                pos_ctrl[0] = -self.cell_size
        elif ac == 1:
            if current_grid_cell[0] <= self.grid_size - 1:
                pos_ctrl[0] = self.cell_size
        elif ac == 2:
            if current_grid_cell[1] >= 1:
                pos_ctrl[1] = -self.cell_size
        elif ac == 3:
            if current_grid_cell[1] <= self.grid_size - 1:
                pos_ctrl[1] = self.cell_size
        else:
            raise Exception('Invalid action')

        rot_ctrl = [1.0, 0.0, 1.0, 0.0]
        # Force the gripper to be at table height
        gripper_pos = self.sim.data.get_site_xpos('robot0:grip')
        pos_ctrl[2] = self.height_offset - gripper_pos[2]
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        # Apply action in the simulator using IK
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #5
0
    def _set_action(self, action):

        assert action.shape == (6, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope

        deviation = sum(abs(self.sim.data.qpos - self.sim.data.ctrl))
        # print(deviation )
        if deviation > 0.35:  # reset control to current position if deviation too high
            self.sim.data.ctrl[:] = self.sim.data.qpos + self.get_dq(
                [0, 0, 0.005, 0, 0, 0])
            print('deviation compensated')

        if self.ctrl_type == "joint":
            action *= 0.05  # limit maximum change in position
            # Apply action #scalarsto simulation.
            utils.ctrl_set_action(self.sim, action)
        elif self.ctrl_type == "cartesian":
            dx = action.reshape(6, )

            max_limit = 0.0001 * 10
            # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction
            x_now = numpy.concatenate(
                (self.sim.data.get_body_xpos("gripper_dummy_heg"),
                 self.sim.data.get_body_xquat("gripper_dummy_heg")))
            x_then = x_now[:3] + dx[:3] * max_limit

            #diff_now = numpy.array(x_now - self.init_x).reshape(7,)
            diff_then = numpy.array(x_then[:3] - self.init_x[:3])

            barriers_min = numpy.array([-0.1, -0.05, -0.1])
            barriers_max = numpy.array([0.1, 0.2, 0.1])

            for i in range(3):
                if (barriers_min[i] < diff_then[i] < barriers_max[i]):
                    dx[i] = dx[i] * max_limit
                elif barriers_min[i] > diff_then[i]:
                    dx[i] = +max_limit
                elif barriers_max[i] < diff_then[i]:
                    dx[i] = -max_limit
            for i in range(3, 6):
                dx[i] = dx[i] * max_limit

            if self.corrective:
                # bias in direction of assembly
                bias_dir = -self.last_obs[:6]
                # print(bias_dir)
                for i in range(3, 6):
                    if bias_dir[i] > 0.5:
                        print(i, bias_dir[i])
                    bias_dir[i] = bias_dir[i]  # slower rotations
                bias_dir /= np.linalg.norm(bias_dir)
                # print(bias_dir)
                dx += bias_dir * max_limit * 0.5
                dx.reshape(6, 1)

            dq = self.get_dq(dx)
            # print(sum(abs(sim.data.qpos-sim.data.ctrl)))
            for i in range(6):
                self.sim.data.ctrl[i] += dq[i]
예제 #6
0
    def _set_action(self, action):
        assert action.shape == (4, )
        action = action.copy()
        pos_discrete_ctrl, gripper_discrete_ctrl = action[:3], action[3]
        assert np.all(pos_discrete_ctrl >= 0)
        scale = 0.04
        # Convert discrete to continuous control
        # 0 - nothing
        # 1 - (n_bins - 1) / 2 - +scale
        # (n_bins+1) / 2 - (n_bins - 1) - -scale
        pos_ctrl = np.zeros_like(pos_discrete_ctrl, dtype=np.float32)
        for ac in range(3):
            if pos_discrete_ctrl[ac] == 0:
                pos_ctrl[ac] = 0.
            elif pos_discrete_ctrl[ac] > 0 and pos_discrete_ctrl[ac] <= (
                    self.n_bins - 1) / 2:
                pos_ctrl[ac] = scale * pos_discrete_ctrl[ac]
            else:
                pos_ctrl[ac] = -scale * \
                    (pos_discrete_ctrl[ac] - (self.n_bins - 1)/2)

                # Fixed rotation of the end effector, expressed as a quaternion
        rot_ctrl = [1., 0., 1., 0.]
        # 0 means close the gripper = -1, 1 means open the gripper = +1
        gripper_ctrl = 2 * gripper_discrete_ctrl - 1
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #7
0
    def step(self, action: np.ndarray):
        action = np.clip(action, self.action_space.low, self.action_space.high)
        if self._control_method == 'torque_control':
            self.forward_dynamics(action)
        elif self._control_method == 'position_control':
            assert action.shape == (4, )
            action = action.copy()
            pos_ctrl, gripper_ctrl = action[:3], action[3]
            pos_ctrl *= 0.1  # limit the action
            rot_ctrl = np.array([0., 1., 1., 0.])
            gripper_ctrl = -50 if gripper_ctrl < 0 else 50
            gripper_ctrl = np.array([gripper_ctrl, -gripper_ctrl])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            ctrl_set_action(self.sim, action)  # For gripper
            mocap_set_action(self.sim,
                             action)  # For pos control of the end effector
            self.sim.step()

        obs = self.get_current_obs()
        next_obs = obs['observation']
        achieved_goal = obs['achieved_goal']
        goal = obs['desired_goal']
        gripper_pos = obs['gripper_pos']
        reward = self._compute_reward(achieved_goal, goal, gripper_pos)
        collided = self._is_collided()
        if collided:
            reward -= 200
        done = (self._goal_distance(achieved_goal, goal) <
                self._distance_threshold) or collided

        return Step(next_obs, reward, done)
예제 #8
0
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)

        action = np.array([0, 0, 0, 1, 0, 1, 0, 1, 1])
        for _ in range(10):
            utils.ctrl_set_action(self.sim, action)
            self.sim.step()

        if self.random_obj:
            self.sim.model.geom_type[-1] = np.random.choice(2) + 5

            min_color_diff = 0
            while min_color_diff < 0.1:
                rgba = np.random.uniform(0, 0.5, size=3)
                if self.train_random:
                    rgba[1] = 1
                    size = np.random.uniform(0.02, 0.025, size=3)
                elif self.test_random:
                    rgba[0] = 1
                    size = np.random.uniform(0.025, 0.03, size=3)
                else:
                    rgba *= 2
                    size = np.random.uniform(0.02, 0.03, size=3)
                color_diff = np.abs(self.sim.model.geom_rgba.copy()[:-1, :3] -
                                    rgba)
                min_color_diff = min(np.sum(color_diff, axis=1))
            self.sim.model.geom_rgba[-1][:3] = rgba

            self.sim.model.geom_size[-1] = size

        # Randomize start position of object.
        if self.goal_type == "fixed":
            offset = np.array([0.04387432, -0.02397519])
        else:
            if self.limit_dir:
                if self.train_random:
                    offset = np.array([
                        self.np_random.uniform(-self.obj_range, 0),
                        self.np_random.uniform(-self.obj_range, self.obj_range)
                    ])
                elif self.test_random:
                    offset = np.array([
                        self.np_random.uniform(0, self.obj_range),
                        self.np_random.uniform(-self.obj_range, self.obj_range)
                    ])
            else:
                offset = self.np_random.uniform(-self.obj_range,
                                                self.obj_range,
                                                size=2)
            norm = np.linalg.norm(offset, axis=-1)
            if norm < 0.1:
                offset = offset / norm * 0.1
        object_xpos = self.initial_gripper_xpos[:2] + offset
        object_qpos = self.sim.data.get_joint_qpos('object0:joint')
        assert object_qpos.shape == (7, )
        object_qpos[:2] = object_xpos
        self.sim.data.set_joint_qpos('object0:joint', object_qpos)

        self.sim.forward()
        return True
예제 #9
0
    def _set_action(self, action):
        assert action.shape == (3,)
        self.counter += 1
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([-1, -1])
        assert gripper_ctrl.shape == (2,)

        if self.counter <= 1:
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            action[2] = 0.56402952

            for _ in range(20):
                utils.mocap_set_action_abs(self.sim, action)
                self.sim.step()
        else:
            pos_ctrl *= 0.05  # limit maximum change in position
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            utils.mocap_set_action(self.sim, action)

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)

        if self.counter >= 3:
        # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025:
            action = np.array([0,0,-0.05,1,0,1,0,1,1])
            utils.mocap_set_action(self.sim, action)
            for _ in range(5):
                utils.ctrl_set_action(self.sim, action)
                self.sim.step()
예제 #10
0
    def _set_action(self, action):
        assert action.shape == (4, )
        if self.mode == "controller":
            self.viewer.joystick_callback(0)
            action = self.viewer.action.copy()  # get action from user
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        if self.mode == "controller":
            self.actions.append(action.reshape(1,
                                               4).copy())  # hardcoded reshape
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
    def _set_action(self, action):
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        if not self.rotational_control_z:
            assert action.shape == (4, )
            pos_ctrl, gripper_ctrl = action[:3], action[3]
            rot_ctrl = [
                1., 0., 1., 0.
            ]  # fixed rotation of the end effector, expressed as a quaternion
        else:
            assert action.shape == (5, )
            pos_ctrl, rot_z, gripper_ctrl = action[:3], action[3], action[4]
            rot_ctrl = Rotation.from_euler('xyz', [180, -90, rot_z * 90],
                                           degrees=True).as_quat()

        pos_ctrl *= 0.05  # limit maximum change in position
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        # gripper finger control (symmetric)
        utils.ctrl_set_action(self.sim, action)
        # gripper xyz position control
        utils.mocap_set_action(self.sim, action)
예제 #12
0
    def _reset_sim(self):
        self.sim.set_state(self.initial_state)
        # Randomize start position of object.
        if self.has_object:
            for i in range(self.num_objs):
                obj_id = self.sim.model.geom_name2id('object%i' % i)
                object_size = self.sim.model.geom_size[obj_id][0]

                object_xpos = self.initial_agent_position[:2] / 2

                if self.deterministic:
                    n = self.np_random.uniform(self.gRange - 0.05, 0, size=1)[0]
                    while n > self.goal.flatten()[0]:
                        n = self.np_random.uniform(self.gRange - 0.05, 0, size=1)[0]
                    object_xpos = np.array([np.array([n, 0])])
                elif self.fixed_obj:
                    object_xpos = np.array([np.array([-0.1, 0])])
                else:
                    while (np.linalg.norm(object_xpos - self.initial_agent_position[:2]) < min(0.2, self.gRange / 4)):
                        object_xpos = self.np_random.uniform(-self.target_range, self.target_range, size=2)

                object_qpos = self.sim.data.get_joint_qpos('object%d:joint' % i)
                assert object_qpos.shape == (7,)
                object_qpos[:2] = object_xpos
                self.sim.data.set_joint_qpos('object%d:joint' % i, object_qpos)

        action = np.array([0, 0])
        utils.ctrl_set_action(self.sim, action)
        self.sim.forward()
        return True
예제 #13
0
    def _set_action(self, action):
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]
        stiffness_ctrl = action[4] if action.shape == (5, ) else 0.0

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        stiffness_ctrl *= 50.0 if action.shape == (5, ) else 0.0
        #        self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0] += stiffness_ctrl
        #        self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:r_gripper_finger_joint'), 0] += stiffness_ctrl
        #        self.sim.model.actuator_biasprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 1] += -stiffness_ctrl
        #        self.sim.model.actuator_biasprm[self.sim.model.actuator_name2id('robot0:r_gripper_finger_joint'), 1] += -stiffness_ctrl
        if action.shape == (5, ):
            stiffness_ctrl += self.prev_stiffness
            stiffness_ctrl = np.max(
                [np.min([stiffness_ctrl, self.psv_prev_stiffness]), 0.0])
            self.prev_stiffness = stiffness_ctrl
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate(
            [pos_ctrl, rot_ctrl, gripper_ctrl, [self.prev_stiffness]])
        #        print("Gripper pose:{}, stiffness:{}".format(gripper_ctrl, self.prev_stiffness))
        #        self.prev_stiffness = self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0]

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action, self.stiffness_on)
        utils.mocap_set_action(self.sim, action)
    def _set_action(self, action):
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [0., 1., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion {Vertical}
        #rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion {Horizontal}
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)


        # Apply action to simulation

        # Determine the closest cloth node to the gripper
        closest, dist_closest = self.find_closest_indice(self.grip_pos)
        # Only allow gripping if in proximity
        if dist_closest<=0.001:
            utils.grasp(self.sim, gripper_ctrl, closest)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)

        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #15
0
파일: slide_env.py 프로젝트: Jekyll1021/gym
    def _set_action(self, action):
        assert action.shape == (3, )
        self.counter += 1
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl = action[:3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([0, 0])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        # utils.ctrl_set_action(self.sim, action)

        utils.mocap_set_action(self.sim, action)

        if self.counter >= 2:
            for _ in range(10):
                self.sim.step()
                pos_ctrl = np.array([0.0, 0.02, 0.0])
                gripper_ctrl = np.array([0, 0])
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
                self.sim.step()
    def _set_action(self, action):
        if action.shape != (4, ):
            try:
                action = np.reshape(action, (4, ))
            except:
                print('Action with shape ', action.shape,
                      ' could not be reshaped to (4,)')
        assert action.shape == (4, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2, )
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
    def _sample_goal(self):
        self.current_state = copy.deepcopy(self.sim.get_state())
        action = np.random.uniform(-1,1,4)
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        # pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        for i in range(1000):
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)

        self.sample_goal_state = copy.deepcopy(self.sim.get_state())

        self.sim.set_state(self.current_state)
        goal = self.render(mode='rgb_array')
        # if self.has_object:
        #     goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
        #     goal += self.target_offset
        #     goal[2] = self.height_offset
        #     if self.target_in_the_air and self.np_random.uniform() < 0.5:
        #         goal[2] += self.np_random.uniform(0, 0.45)
        # else:
        #     goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3)
        return goal.copy()
예제 #18
0
    def _set_action(self, action):
        if self._use_real_robot:
            assert action.shape == (self.n_actions,) # 6 mobile base
            action = action.copy()  # ensure that we don't change the action outside of this scope
            if self.debug_print:
                print("_set_action:", action)

            pos_ctrl, gripper_ctrl = action[:3], action[3:]

            pos_ctrl *= 0.03  # limit maximum change in position
            rot_ctrl = [0.5, 0.5, -0.5, -0.5] #(0 0 0)

            if self.gripper_close:
                gripper_ctrl = 1.0
            else:
                gripper_ctrl = -1.0
            gripper_ctrl = self.gripper_format_action(gripper_ctrl)
            assert gripper_ctrl.shape == (self.gripper_actual_dof,)
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            # action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl])
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

            ee_pose = self.sia_7f_arm_robot.arm_get_ee_pose()

            arm_action = pos_ctrl
            print("arm_action: ", arm_action)

            # Applay action to real robot
            self.sia_7f_arm_robot.arm_set_ee_pose_relative(arm_action)
            if self.gripper_close:
                self.sia_7f_arm_robot.gripper_close()
            else:
                self.sia_7f_arm_robot.gripper_open()

        else:    
            assert action.shape == (self.n_actions,) # 6 mobile base
            action = action.copy()  
            if self.debug_print:
                print("_set_action:", action)
            pos_ctrl, gripper_ctrl = action[:3], action[3:]

            pos_ctrl *= 0.03  # limit maximum change in position
            rot_ctrl = [0.5, 0.5, -0.5, -0.5] #(0 0 0)

            if self.gripper_close:
                gripper_ctrl = 1.0
            else:
                gripper_ctrl = -1.0
            gripper_ctrl = self.gripper_format_action(gripper_ctrl)
            # assert gripper_ctrl.shape == (2,)
            assert gripper_ctrl.shape == (self.gripper_actual_dof,)
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            
            # Apply action to simulation.
            utils.ctrl_set_action(self.sim, action) # base control + gripper control
            utils.mocap_set_action(self.sim, action) # arm control in cartesion (x, y, z)
예제 #19
0
    def _set_action(self, action):
        if not self.discrete:
            # Continuous
            assert action.shape == (4,)
            action = action.copy()  # ensure that we don't change the action outside of this scope
            pos_ctrl, gripper_ctrl = action[:3], action[3]

            pos_ctrl *= 0.05  # limit maximum change in position
            # fixed rotation of the end effector, expressed as a quaternion
            rot_ctrl = [1., 0., 1., 0.]
            gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
            assert gripper_ctrl.shape == (2,)
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        else:
            # Discrete
            assert action.shape == (4,)
            action = action.copy()
            pos_discrete_ctrl, gripper_discrete_ctrl = action[:3], action[3]
            assert np.all(pos_discrete_ctrl >= 0)
            scale = 0.04
            # Convert discrete to continuous control
            # 0 - nothing
            # 1 - (n_bins - 1) / 2 - +scale
            # (n_bins+1) / 2 - (n_bins - 1) - -scale
            pos_ctrl = np.zeros_like(pos_discrete_ctrl, dtype=np.float32)
            for ac in range(3):
                if pos_discrete_ctrl[ac] == 0:
                    pos_ctrl[ac] = 0.
                elif pos_discrete_ctrl[ac] > 0 and pos_discrete_ctrl[ac] <= (self.n_bins - 1) / 2:
                    pos_ctrl[ac] = scale * pos_discrete_ctrl[ac]
                else:
                    pos_ctrl[ac] = -scale * \
                        (pos_discrete_ctrl[ac] - (self.n_bins - 1)/2)

            # Fixed rotation of the end effector, expressed as a quaternion
            rot_ctrl = [1., 0., 1., 0.]
            # -1 means close the gripper, 0 means do nothing, 1 means open the gripper
            gripper_ctrl = np.array(
                [gripper_discrete_ctrl, gripper_discrete_ctrl])
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Force gripper to be at table height
        obs = self._get_obs()
        gripper_pos = obs['observation'][0:3]
        pos_ctrl[2] = self.height_offset - gripper_pos[2]
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        # Apply action to simulation
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
 def _set_action(self, action):
     assert action.shape == (4, )
     action = action.copy()
     pos_ctrl, gripper_ctrl = action[:3], action[3]
     rot_ctrl = [1., 0., 1., 0.]
     gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
     assert gripper_ctrl.shape == (2, )
     if self.block_gripper:
         gripper_ctrl = np.zeros_like(gripper_ctrl)
     action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
     r_utils.ctrl_set_action(self.sim, action)
     r_utils.mocap_set_action(self.sim, action)
예제 #21
0
파일: block_env.py 프로젝트: tgfred/flow_rl
    def _set_action(self, action):
        assert action.shape == (self.n_actions,)

        self.action = action

        # energy correction
        h = self.sim.data.get_site_xpos('tar')[2]
        if h < 0.8:
            self.energy_corrected = False
        # each time the target crosses the z == 0.8 line its energy gets corrected
        if not self.energy_corrected:
            if h > 0.8:
                self.sim.data.set_joint_qvel('tar:z', self.sqrt_2_g * np.sqrt(self.initial_h - h))
                self.give_reflection_reward = True
                self.energy_corrected = True

        if not self.no_movement:
            a = np.zeros([8], dtype=np.float32)
            a[0] = 0.015 * action[0]
            a[1] = 0.015 * action[1]
            euler = rotations.mat2euler(self.sim.data.get_site_xmat('robot0:grip'))
            a_4 = euler[0]
            a_5 = euler[1]

            # adjusting z position to a plane (otherwise the arm has a downwards drift)
            if self.sim.data.get_site_xpos('robot0:grip')[2] < 0.705:
                a[2] = 0.665 - self.sim.data.get_site_xpos('robot0:grip')[2]
            else:
                a[2] = 0.665 - self.sim.data.get_site_xpos('robot0:grip')[2]
            a[4] = 0.015 * action[2]
            a[5] = 0.015 * action[3]

            # Restricting rotation action:

            if a_4 > 0.3 and action[2] > 0.:
                a[4] = 0.
            if a_4 < -0.3 and action[2] < 0.:
                a[4] = 0.

            if a_5 > 0.3 and action[3] > 0.:
                a[5] = 0.
            if a_5 < -0.3 and action[3] < 0.:
                a[5] = 0.

            # Apply action to simulation.

            utils.ctrl_set_action(self.sim, a)
            utils.mocap_set_action(self.sim, a)

        else:
            pass
예제 #22
0
    def _set_action(self, action):

        assert action.shape == (6, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope

        deviation = sum(abs(self.sim.data.qpos - self.sim.data.ctrl))
        # print(deviation )
        if deviation > 0.35:  # reset control to current position if deviation too high
            self.sim.data.ctrl[:] = self.sim.data.qpos
            +self.get_dq([0, 0, 0.005, 0, 0, 0])
            #print('deviation compensated')

        if self.ctrl_type == "joint":
            action *= 0.05  # limit maximum change in position
            # Apply action #scalarsto simulation.
            utils.ctrl_set_action(self.sim, action)
        elif self.ctrl_type == "cartesian":
            dx = action.reshape(6, )

            max_limit = self.dx_max
            # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction
            x_now = numpy.concatenate(
                (self.sim.data.get_body_xpos("gripper_dummy_heg"),
                 self.sim.data.get_body_xquat("gripper_dummy_heg")))
            x_then = x_now[:3] + dx[:3] * max_limit

            #diff_now = numpy.array(x_now - self.init_x).reshape(7,)
            diff_then = numpy.array(x_then[:3] - self.init_x[:3])

            barriers_min = numpy.array([-0.4, -0.8, -0.4])
            barriers_max = numpy.array([0.4, 0.8, 0.4])

            #for i in range(3):
            #    if (barriers_min[i] < diff_then[i] < barriers_max[i]):
            #    dx[i] = dx[i] * max_limit
            #    elif barriers_min[i] > diff_then[i]:
            #        dx[i] = + max_limit
            #    elif barriers_max[i] < diff_then[i]:
            #        dx[i] = - max_limit
            for i in range(6):
                dx[i] = dx[i] * max_limit

            #rot_mat = self.sim.data.get_body_xmat('gripper_dummy_heg')
            #dx_ = np.concatenate([rot_mat.dot(dx[:3]), rot_mat.dot(dx[3:])])
            #dx_[2]+= 1
            dq = self.get_dq(dx)
            for i in range(6):
                self.sim.data.ctrl[i] += dq[i]
예제 #23
0
    def _set_action(self, action):
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1., 0., 1., 0.
        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #24
0
    def _set_action(self, action):
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
        
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #25
0
    def _set_action(self, action):
        assert action.shape == (self.n_actions, )

        self.action = action

        if not self.no_movement:
            while action.shape < (4, ):
                action = np.append(action, [0.])
            if self.sim.data.get_site_xpos(
                    'robot0:grip')[0] < 1.3 and action[0] < 0.:
                action[0] = 0.
            if self.sim.data.get_site_xpos(
                    'robot0:grip')[0] > 1.8 and action[0] > 0.:
                action[0] = 0.

            if self.sim.data.get_site_xpos(
                    'robot0:grip')[1] < 0.4 and action[1] < 0.:
                action[1] = 0.
            if self.sim.data.get_site_xpos(
                    'robot0:grip')[1] > 1.1 and action[1] > 0.:
                action[1] = 0.

            if self.sim.data.get_site_xpos(
                    'robot0:grip')[2] < 0.47 and action[2] < 0.:
                action[2] = 0.
            if self.sim.data.get_site_xpos(
                    'robot0:grip')[2] > 0.87 and action[2] > 0.:
                action[2] = 0.

                action = action.copy(
                )  # ensure that we don't change the action outside of this scope
                pos_ctrl, gripper_ctrl = action[:3], action[3]

                pos_ctrl *= 0.05  # limit maximum change in position
                # rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
                rot_ctrl = [1., 0., 0., 0.]
                gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
                assert gripper_ctrl.shape == (2, )
                if self.block_gripper:
                    gripper_ctrl = np.zeros_like(gripper_ctrl)
                # add matrix to one
                action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

                # Apply action to simulation.
                utils.ctrl_set_action(self.sim, action)
                utils.mocap_set_action(self.sim, action)
        else:
            pass
예제 #26
0
    def _set_action(self, action):
        assert action.shape == (6, )
        action = action.copy(
        )  # ensure that we don't change the action outside of this scope

        if self.ctrl_type == "joint":
            action *= 0.05  # limit maximum change in position
            # Apply action to simulation.
            utils.ctrl_set_action(self.sim, action)
        elif self.ctrl_type == "cartesian":
            dx = action.reshape(6, )

            max_limit = 0.00005
            # limitation of operation space, we only allow small rotations adjustments in x and z directions, moving in y direction
            x_now = numpy.concatenate(
                (self.sim.data.get_body_xpos("gripper_dummy_heg"),
                 self.sim.data.get_body_xquat("gripper_dummy_heg")))
            x_then = x_now[:3] + dx[:3] * max_limit

            #diff_now = numpy.array(x_now - self.init_x).reshape(7,)
            diff_then = numpy.array(x_then[:3] - self.init_x[:3])

            barriers_min = numpy.array([-0.1, -0.05, -0.1])
            barriers_max = numpy.array([0.1, 0.2, 0.1])

            for i in range(3):
                if (barriers_min[i] < diff_then[i] < barriers_max[i]):
                    dx[i] = dx[i] * max_limit
                elif barriers_min[i] > diff_then[i]:
                    dx[i] = +max_limit
                elif barriers_max[i] < diff_then[i]:
                    dx[i] = -max_limit

            for i in range(3, 6):
                dx[i] = dx[i] * max_limit * 0.01  #slower rotations

            # dx[1] += 0.5*max_limit #bias in direction of assembly
            dx.reshape(6, 1)

            jacp = self.sim.data.get_body_jacp(
                name="gripper_dummy_heg").reshape(3, 6)
            jacr = self.sim.data.get_body_jacr(
                name="gripper_dummy_heg").reshape(3, 6)
            jac = numpy.vstack((jacp, jacr))
            dq = numpy.linalg.lstsq(jac, dx)[0].reshape(6, )
            # print(sum(abs(sim.data.qpos-sim.data.ctrl)))
            for i in range(6):
                self.sim.data.ctrl[i] += dq[i]
예제 #27
0
파일: fetch_env.py 프로젝트: jiapei100/gym
    def _set_action(self, action):
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #28
0
    def _set_action(self, action):
        # TODO: set action, n_action (= number of actuators) and action control
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [0., 0., 0., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
예제 #29
0
    def _set_action(self, action):
        assert action.shape == (self.n_actions, )  # 7

        self.action = action

        if not self.no_movement:
            while action.shape < (4, ):
                action = np.append(action, [0.])
            if self.sim.data.get_site_xpos(
                    'gripperpalm')[0] < 1.3 and action[0] < 0.:
                action[0] = 0.
            if self.sim.data.get_site_xpos(
                    'gripperpalm')[0] > 1.8 and action[0] > 0.:
                action[0] = 0.

            if self.sim.data.get_site_xpos(
                    'gripperpalm')[1] < 0.4 and action[1] < 0.:
                action[1] = 0.
            if self.sim.data.get_site_xpos(
                    'gripperpalm')[1] > 1.1 and action[1] > 0.:
                action[1] = 0.

            if self.sim.data.get_site_xpos(
                    'gripperpalm')[2] < 0.47 and action[2] < 0.:
                action[2] = 0.
            if self.sim.data.get_site_xpos(
                    'gripperpalm')[2] > 0.87 and action[2] > 0.:
                action[2] = 0.

            action = action.copy(
            )  # ensure that we don't change the action outside of this scope
            pos_ctrl, gripper_ctrl = action[:3], action[3]

            pos_ctrl *= 0.05  # limit maximum change in position
            rot_ctrl = [1., 0., 0., 0.]
            gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
            assert gripper_ctrl.shape == (2, )
            if self.block_gripper:
                gripper_ctrl = np.zeros_like(gripper_ctrl)
            action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
            print("action:", action)
            # print("action size:", action.size())
            # Apply action to simulation.
            utils.ctrl_set_action(self.sim, action)
            utils.mocap_set_action(self.sim, action)
        else:
            pass
예제 #30
0
    def _set_action(self, action):
        assert action.shape == (4,)
        action = action.copy()  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]
        # print("------>",action)
        #self.sim.data.set_joint_qpos('robot0:wrist_roll_joint', 0.)
        pos_ctrl *= 0.05  # limit maximum change in position
        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
            gripper_ctrl = np.zeros_like(gripper_ctrl)
        action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])

        # Apply action to simulation.
        utils.ctrl_set_action(self.sim, action)
        utils.mocap_set_action(self.sim, action)
    def _set_action(self, action):
        assert action.shape == (3, )
        action = (
            action.copy()
        )  # ensure that we don't change the action outside of this scope

        action *= 0.05  # limit maximum change in position
        rot_ctrl = [
            1.0,
            0.0,
            1.0,
            0.0,
        ]  # fixed rotation of the end effector, expressed as a quaternion
        action = np.concatenate([action, rot_ctrl, np.zeros(2)])

        # Apply action to simulation.
        robot_utils.ctrl_set_action(self.sim, action)
        robot_utils.mocap_set_action(self.sim, action)