def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # initial position of end-effector
        self.ee_initial_pos = self._eef_xpos

        # create trajectory
        self.trajectory = self.get_trajectory()
        
        # initialize trajectory step
        self.initial_traj_step = 0#np.random.default_rng().uniform(low=0, high=self.num_waypoints - 1)
        self.traj_step = self.initial_traj_step                                    # step at which to evaluate trajectory. Must be in interval [0, num_waypoints - 1]
        
        # set first trajectory point
        self.traj_pt = self.trajectory.eval(self.traj_step)
        self.traj_pt_vel = self.trajectory.deriv(self.traj_step)

        # give controller access to robot (and its measurements)
        if self.robots[0].controller.name == "HMFC":
            self.robots[0].controller.set_robot(self.robots[0])

        # initialize controller's trajectory
        self.robots[0].controller.traj_pos = self.traj_pt
        self.robots[0].controller.traj_ori = T.quat2axisangle(self.goal_quat)

        # get initial joint positions for robot
        init_qpos = self._get_initial_qpos()

        # override initial robot joint positions
        self.robots[0].set_robot_joint_positions(init_qpos)

        # update controller with new initial joints
        self.robots[0].controller.update_initial_joints(init_qpos)

        # initialize data collection
        if self.save_data:
            # simulation data
            self.data_ee_pos = np.array(np.zeros((self.horizon, 2)))
            self.data_ee_goal_pos = np.array(np.zeros((self.horizon, 2)))
            self.data_ee_force = np.array(np.zeros(self.horizon))
            self.data_ee_force_mean = np.array(np.zeros(self.horizon))
            self.data_ee_goal_force = np.array(np.zeros(self.horizon))
            self.data_ee_z_pos = np.array(np.zeros(self.horizon))
            self.data_desired_torque = np.array(np.zeros((self.horizon, self.robots[0].dof)))
            self.data_external_torque = np.array(np.zeros((self.horizon, self.robots[0].dof)))
            self.data_compensation_torque = np.array(np.zeros((self.horizon, self.robots[0].dof)))
            self.data_time = np.array(np.zeros(self.horizon))
Exemple #2
0
    def forward(self, prediction, truth):
        """
        Computes position distance + orientation distance (squared) between prediction and orientation if
            self.position_only is set to False, else only computes position distance.
            Assumes prediction and truth are of the same size

        Args:
            prediction (torch.Tensor): Prediction of estimated pose (*, 7) of form (x,y,z,i,j,k,w)
                Note: Assumes UNNORMALIZED quat part
            truth (torch.Tensor): Ground truth state of pose, of shape (*, 7) of form (x,y,z,i,j,k,w)
                Note: Assumes NORMALIZED quat part with POSITIVE w value

        Returns:
            distance (torch.Tensor): (Summed) distances across all inputs. Shape = (1,)
        """

        # First, split tensor into separate parts
        predict_pos, predict_ori = torch.split(prediction, (3, 4), dim=-1)
        true_pos, true_ori = torch.split(truth, (3, 4), dim=-1)

        # Normalize the predicted ori
        quat_mag = torch.sqrt(
            torch.sum(predict_ori.pow(2), dim=-1, keepdim=True))  # Shape (*,1)
        predict_ori = predict_ori / quat_mag

        # Compute position loss based on distance metric specified
        if self.distance_metric == "l2":
            # Compute pos error (cartesian distance)
            summed_pos_sq = torch.sum((predict_pos - true_pos).pow(2),
                                      dim=-1,
                                      keepdim=False)  # Shape (*)
            pos_dist = torch.sum(torch.sqrt(summed_pos_sq + self.epsilon))
        elif self.distance_metric == "l1" or self.distance_metric == "linf":
            # Compute absolute values first
            dist_pos_abs = torch.abs(predict_pos - true_pos)
            if self.distance_metric == "l1":
                pos_dist = torch.sum(dist_pos_abs)
            else:  # linf case
                pos_dist = torch.sum(torch.max(dist_pos_abs, dim=-1)[0])
        else:  #combined case
            # Compute l2 loss
            summed_pos_sq = torch.sum((predict_pos - true_pos).pow(2),
                                      dim=-1,
                                      keepdim=False)  # Shape (*)
            l2_pos_dist = torch.sum(torch.sqrt(summed_pos_sq + self.epsilon))
            # Compute l1 and linf losses
            dist_pos_abs = torch.abs(predict_pos - true_pos)
            l1_pos_dist = torch.sum(dist_pos_abs)
            linf_pos_dist = torch.sum(torch.max(dist_pos_abs, dim=-1)[0])
            # Combine distances
            pos_dist = l2_pos_dist + l1_pos_dist + linf_pos_dist

        # Additionally compute orientation error if requested
        if self.mode == "val":
            # We directly compute the summed absolute angle error in radians
            # Note: we don't care about torch-compatible computations since we're not backpropping in this case
            # Convert torch tensors to numpy arrays
            predict_ori = predict_ori.view(-1, 4).cpu().detach().numpy()
            true_ori = true_ori.view(-1, 4).cpu().detach().numpy()
            # Create var to hold summed orientation error
            summed_angle_err = 0
            # Loop over all quats and compute angle error
            for i in range(predict_ori.shape[0]):
                _, angle = quat2axisangle(
                    quat_distance(predict_ori[i], true_ori[i]))
                # Confine the error to be between +/- pi
                if angle < -np.pi:
                    angle += 2 * np.pi
                elif angle > np.pi:
                    angle -= 2 * np.pi
                summed_angle_err += abs(angle)
            # Immediately return the position and orientation errors
            return pos_dist.cpu().detach().numpy(), summed_angle_err
        elif self.mode == "pose":
            # Compute ori error (quat distance), roughly d(q1,q2) = 1 - <q1,q2>^2 where <> is the inner product
            # see https://math.stackexchange.com/questions/90081/quaternion-distance
            inner_product = torch.sum(predict_ori * true_ori,
                                      dim=-1,
                                      keepdim=False)  # Shape (*)
            ori_dist_squared = torch.sum(1 - inner_product.pow(2))
            # We also add in penalty if w component is negative
            # see https://dspace.mit.edu/bitstream/handle/1721.1/119699/1078151125-MIT.pdf?sequence=1
            w_penalty = torch.clamp(-predict_ori[..., -1], min=0)
            ori_dist_squared += torch.sum(w_penalty)
        else:  # case "position"
            # No orientation loss used
            ori_dist_squared = 0

        # Return summed dist
        return self.scale_factor * (pos_dist + self.alpha * ori_dist_squared)
Exemple #3
0
def input2action(device, robot, active_arm="right", env_configuration=None):
    """
    Converts an input from an active device into a valid action sequence that can be fed into an env.step() call

    If a reset is triggered from the device, immediately returns None. Else, returns the appropriate action

    Args:
        device (Device): A device from which user inputs can be converted into actions. Can be either a Spacemouse or
            Keyboard device class

        robot (Robot): Which robot we're controlling

        active_arm (str): Only applicable for multi-armed setups (e.g.: multi-arm environments or bimanual robots).
            Allows inputs to be converted correctly if the control type (e.g.: IK) is dependent on arm choice.
            Choices are {right, left}

        env_configuration (str or None): Only applicable for multi-armed environments. Allows inputs to be converted
            correctly if the control type (e.g.: IK) is dependent on the environment setup. Options are:
            {bimanual, single-arm-parallel, single-arm-opposed}

    Returns:
        2-tuple:

            - (None or np.array): Action interpreted from @device including any gripper action(s). None if we get a
                reset signal from the device
            - (None or int): 1 if desired close, -1 if desired open gripper state. None if get a reset signal from the
                device

    """
    state = device.get_controller_state()
    # Note: Devices output rotation with x and z flipped to account for robots starting with gripper facing down
    #       Also note that the outputted rotation is an absolute rotation, while outputted dpos is delta pos
    #       Raw delta rotations from neutral user input is captured in raw_drotation (roll, pitch, yaw)
    dpos, rotation, raw_drotation, grasp, reset = (
        state["dpos"],
        state["rotation"],
        state["raw_drotation"],
        state["grasp"],
        state["reset"],
    )

    # If we're resetting, immediately return None
    if reset:
        return None, None

    # Get controller reference
    controller = robot.controller if not isinstance(
        robot, Bimanual) else robot.controller[active_arm]
    gripper_dof = robot.gripper.dof if not isinstance(
        robot, Bimanual) else robot.gripper[active_arm].dof

    # First process the raw drotation
    drotation = raw_drotation[[1, 0, 2]]
    if controller.name == 'IK_POSE':
        # If this is panda, want to swap x and y axis
        if isinstance(robot.robot_model, Panda):
            drotation = drotation[[1, 0, 2]]
        else:
            # Flip x
            drotation[0] = -drotation[0]
        # Scale rotation for teleoperation (tuned for IK)
        drotation *= 10
        dpos *= 5
        # relative rotation of desired from current eef orientation
        # map to quat
        drotation = T.mat2quat(T.euler2mat(drotation))

        # If we're using a non-forward facing configuration, need to adjust relative position / orientation
        if env_configuration == "single-arm-opposed":
            # Swap x and y for pos and flip x,y signs for ori
            dpos = dpos[[1, 0, 2]]
            drotation[0] = -drotation[0]
            drotation[1] = -drotation[1]
            if active_arm == "left":
                # x pos needs to be flipped
                dpos[0] = -dpos[0]
            else:
                # y pos needs to be flipped
                dpos[1] = -dpos[1]

        # Lastly, map to axis angle form
        drotation = T.quat2axisangle(drotation)

    elif controller.name == 'OSC_POSE':
        # Flip z
        drotation[2] = -drotation[2]
        # Scale rotation for teleoperation (tuned for OSC) -- gains tuned for each device
        drotation = drotation * 1.5 if isinstance(device,
                                                  Keyboard) else drotation * 50
        dpos = dpos * 75 if isinstance(device, Keyboard) else dpos * 125
    else:
        # No other controllers currently supported
        print(
            "Error: Unsupported controller specified -- Robot must have either an IK or OSC-based controller!"
        )

    # map 0 to -1 (open) and map 1 to 1 (closed)
    grasp = 1 if grasp else -1

    # Create action based on action space of individual robot
    action = np.concatenate([dpos, drotation, [grasp] * gripper_dof])

    # Return the action and grasp
    return action, grasp
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)
        # print('object axang to use ' + str(object_axang_touse))

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # gripper_quat_inv = T.quat_inverse(gripper_quat)
        # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat)
        # changing_wf_axang = T.quat2axisangle(changing_wf)

        # print('changing wf axis ' +str(changing_wf_axang))

        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            # print(self.nut_p)
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            # frac = 0.2 # Rate @ which to rotate gripper about z.
            # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z.

            # if self.gripper_reoriented == 0:
            #     self.gripper_init_quat = gripper_quat
            # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps
            #     action_angle= [0,0,ang_goal]

            #     #print('object ' + str(object_axang))
            #     #print('gripper ' + str(gripper_axang))

            #     init_inv = T.quat_inverse(self.gripper_init_quat)
            #     init_inv_mat = T.quat2mat(init_inv)

            #     rel = T.quat_multiply(gripper_quat,init_inv)
            #     rel_axang = T.quat2axisangle(rel)
            #     #print('rel_axang ' +str(rel_axang))

            #     rel_axang_WF = np.matmul(init_inv_mat,rel_axang)

            #     #print('rel_axang_WF ' +str(rel_axang_WF))

            #     if take_action:
            #         self.gripper_reoriented+=1
            # else: # After 5 action steps, don't rotate gripper any more
            #     action_angle=[0,0,0]

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]
            #action_angle = [0,0,0]
            #print('action angle ' +str(action_angle))

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
                    #self.gripper_reoriented = 5

        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982  # the z-coordinate of the table surface

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # reorient the gripper to match the nut faces and move above the nut
        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
        # close the gripper and pick the nut
        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
    def __init__(self, *args, **kwargs):
        options = {}
        # Absolute pose control
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.cameraName = "frontview"
        skip_frame = 2
        self.peg_env = suite.make(
            "TwoArmPegInHole",
            robots=["IIWA", "IIWA"],
            **options,
            has_renderer=False,
            ignore_done=True,
            use_camera_obs=True,
            use_object_obs=True,
            camera_names=self.cameraName,
            camera_heights=512,
            camera_widths=512,
        )

        # Tolerances on position and rotation of peg relative to hole
        posTol = 0.005
        rotTol = 0.05

        observation = self.peg_env.reset()

        # observation["peg_to_hole"] is the vector FROM the hole TO the peg
        peg_pos0 = observation["hole_pos"] + observation["peg_to_hole"]
        self.peg_pos0 = peg_pos0
        self.hole_pos0 = observation["hole_pos"]

        # Positions of robots 0 and 1 rel peg and hole, in peg and hole frames.  Constant forever.
        pRob0RelPeg = np.matmul(
            T.quat2mat(T.quat_inverse(observation["peg_quat"])),
            observation["robot0_eef_pos"] - (peg_pos0))
        pRob1RelHole = np.matmul(
            T.quat2mat(T.quat_inverse(observation["hole_quat"])),
            observation["robot1_eef_pos"] - observation["hole_pos"])
        qRob0RelPeg = T.quat_multiply(
            T.quat_inverse(observation["robot0_eef_quat"]),
            observation["peg_quat"])
        qRob1RelHole = T.quat_multiply(
            T.quat_inverse(observation["robot1_eef_quat"]),
            observation["hole_quat"])

        # Store geometric constants
        model = self.peg_env.model.get_model()
        pegDim = model.geom_size[15]
        rPeg = pegDim[0]
        lPeg = pegDim[2]
        distSlack = 2 * lPeg

        # Set up optimization problem.
        # Define 3 keyframes: peg higher than hole, peg centered above hole with hole facing up, and peg in hole.
        # One constraint for each keyframe, and one constraint for unit quaternions
        nonlinear_constraint_1 = NonlinearConstraint(self.cons_1,
                                                     distSlack,
                                                     np.inf,
                                                     jac='2-point',
                                                     hess=BFGS())
        nonlinear_constraint_2 = NonlinearConstraint(
            self.cons_2,
            np.array([-posTol, -posTol, distSlack, -rotTol]),
            np.array([posTol, posTol, np.inf, rotTol]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_3 = NonlinearConstraint(
            self.cons_3,
            np.array([-posTol, -posTol, distSlack]),
            np.array([posTol, posTol, np.inf]),
            jac='2-point',
            hess=BFGS())
        nonlinear_constraint_4 = NonlinearConstraint(self.cons_unit_quat,
                                                     np.array([1, 1, 1, 1]),
                                                     np.array([1, 1, 1, 1]),
                                                     jac='2-point',
                                                     hess=BFGS())

        # Initial guess for optimizer
        x0 = np.tile(
            np.hstack((peg_pos0, observation["hole_pos"],
                       observation["peg_quat"], observation["hole_quat"])), 3)
        # Cut out quat from last chunk
        x0 = x0[0:34]

        # Solve optimization problem
        res = minimize(self.traj_obj,
                       x0,
                       method='trust-constr',
                       jac='2-point',
                       hess=BFGS(),
                       constraints=[
                           nonlinear_constraint_1, nonlinear_constraint_2,
                           nonlinear_constraint_3, nonlinear_constraint_4
                       ],
                       options={'verbose': 1})
        # 'xtol': 1e-6,
        x = res.x
        # Extract optimization results
        # x = [p_peg_1, p_hole_1, q_peg_1, q_hole_1, p_peg_2, ... q_peg_3, q_hole_3]
        ind_offset_1 = 14
        ind_offset_2 = 28

        p_peg_1 = x[0:3]
        p_hole_1 = x[3:6]
        p_peg_2 = x[ind_offset_1 + 0:ind_offset_1 + 3]
        p_hole_2 = x[ind_offset_1 + 3:ind_offset_1 + 6]
        p_peg_3 = x[ind_offset_2 + 0:ind_offset_2 + 3]
        p_hole_3 = x[ind_offset_2 + 3:ind_offset_2 + 6]

        q_peg_1 = x[6:10]
        q_hole_1 = x[10:14]
        q_peg_2 = x[ind_offset_1 + 6:ind_offset_1 + 10]
        q_hole_2 = x[ind_offset_1 + 10:ind_offset_1 + 14]

        # Use the same orientations as in pose 2
        q_peg_3 = q_peg_2
        q_hole_3 = q_hole_2

        # Robot rel world = peg rel world + robot rel peg
        # Robot rel peg in world frame = (q world frame rel peg frame) * (robot rel peg in peg frame)
        q_rob0_1 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_1)))
        q_rob1_1 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_1)))
        q_rob0_2 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_2)))
        q_rob1_2 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_2)))
        q_rob0_3 = T.quat_inverse(
            T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_3)))
        q_rob1_3 = T.quat_inverse(
            T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_3)))

        self.p_rob0_1 = p_peg_1 + np.matmul(T.quat2mat(q_peg_1), pRob0RelPeg)
        self.p_rob1_1 = p_hole_1 + np.matmul(T.quat2mat(q_hole_1),
                                             pRob1RelHole)
        self.p_rob0_2 = p_peg_2 + np.matmul(T.quat2mat(q_peg_2), pRob0RelPeg)
        self.p_rob1_2 = p_hole_2 + np.matmul(T.quat2mat(q_hole_2),
                                             pRob1RelHole)
        self.p_rob0_3 = p_peg_3 + np.matmul(T.quat2mat(q_peg_3), pRob0RelPeg)
        self.p_rob1_3 = p_hole_3 + np.matmul(T.quat2mat(q_hole_3),
                                             pRob1RelHole)

        self.axang_rob0_1 = T.quat2axisangle(q_rob0_1)
        self.axang_rob1_1 = T.quat2axisangle(q_rob1_1)
        self.axang_rob0_2 = T.quat2axisangle(q_rob0_2)
        self.axang_rob1_2 = T.quat2axisangle(q_rob1_2)
        self.axang_rob0_3 = T.quat2axisangle(q_rob0_3)
        self.axang_rob1_3 = T.quat2axisangle(q_rob1_3)

        self.max_episode_steps = 200
        # Gains for rotation and position error compensation rates
        self.kpp = 4
        self.kpr = 0.1

        # Initial pose Information
        self.rob0quat0 = observation["robot0_eef_quat"]
        self.rob1quat0 = observation["robot1_eef_quat"]
        self.rob0pos0 = observation["robot0_eef_pos"]
        self.rob1pos0 = observation["robot1_eef_pos"]
Exemple #7
0
    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            object_placements = self.placement_initializer.sample()

            # Loop through all objects and reset their positions
            for obj_pos, _, obj in object_placements.values():
                self.sim.data.set_joint_qpos(
                    obj.joints[0],
                    np.concatenate(
                        [np.array(obj_pos),
                         np.array([0.5, 0.5, -0.5, -0.5])]))
                self.sim.forward()  # update sim states

        # says if probe has been in touch with torso
        self.has_touched_torso = False

        # initial position of end-effector
        self.ee_initial_pos = self._eef_xpos

        # create trajectory
        self.trajectory = self.get_trajectory()

        # initialize trajectory step
        self.initial_traj_step = np.random.default_rng().uniform(
            low=0, high=self.num_waypoints - 1)
        self.traj_step = self.initial_traj_step  # step at which to evaluate trajectory. Must be in interval [0, num_waypoints - 1]

        # set first trajectory point
        self.traj_pt = self.trajectory.eval(self.traj_step)
        self.traj_pt_vel = self.trajectory.deriv(self.traj_step)

        # give controller access to robot (and its measurements)
        if self.robots[0].controller.name == "HMFC":
            self.robots[0].controller.set_robot(self.robots[0])

        # initialize controller's trajectory
        self.robots[0].controller.traj_pos = self.traj_pt
        self.robots[0].controller.traj_ori = T.quat2axisangle(self.goal_quat)

        # get initial joint positions for robot
        init_qpos = self._get_initial_qpos()

        # override initial robot joint positions
        self.robots[0].set_robot_joint_positions(init_qpos)

        # update controller with new initial joints
        self.robots[0].controller.update_initial_joints(init_qpos)

        # initialize previously contact force measurement
        self.prev_z_contact_force = 0

        # intialize derivative of contact force
        self.der_z_contact_force = 0

        # initialize running mean of velocity
        self.vel_running_mean = np.linalg.norm(self.robots[0]._hand_vel)

        # initialize running mean of contact force
        self.z_contact_force_running_mean = self.sim.data.cfrc_ext[
            self.probe_id][-1]

        # initialize data collection
        if self.save_data:
            # simulation data
            self.data_ee_pos = np.array(np.zeros((self.horizon, 3)))
            self.data_ee_goal_pos = np.array(np.zeros((self.horizon, 3)))
            self.data_ee_ori_diff = np.array(np.zeros(self.horizon))
            self.data_ee_vel = np.array(np.zeros((self.horizon, 3)))
            self.data_ee_goal_vel = np.array(np.zeros(self.horizon))
            self.data_ee_running_mean_vel = np.array(np.zeros(self.horizon))
            self.data_ee_quat = np.array(np.zeros(
                (self.horizon, 4)))  # (x,y,z,w)
            self.data_ee_goal_quat = np.array(np.zeros(
                (self.horizon, 4)))  # (x,y,z,w)
            self.data_ee_diff_quat = np.array(np.zeros(
                self.horizon))  # (x,y,z,w)
            self.data_ee_z_contact_force = np.array(np.zeros(self.horizon))
            self.data_ee_z_goal_contact_force = np.array(np.zeros(
                self.horizon))
            self.data_ee_z_running_mean_contact_force = np.array(
                np.zeros(self.horizon))
            self.data_ee_z_derivative_contact_force = np.array(
                np.zeros(self.horizon))
            self.data_ee_z_goal_derivative_contact_force = np.array(
                np.zeros(self.horizon))
            self.data_is_contact = np.array(np.zeros(self.horizon))
            self.data_q_pos = np.array(
                np.zeros((self.horizon, self.robots[0].dof)))
            self.data_q_torques = np.array(
                np.zeros((self.horizon, self.robots[0].dof)))
            self.data_time = np.array(np.zeros(self.horizon))

            # reward data
            self.data_pos_reward = np.array(np.zeros(self.horizon))
            self.data_ori_reward = np.array(np.zeros(self.horizon))
            self.data_vel_reward = np.array(np.zeros(self.horizon))
            self.data_force_reward = np.array(np.zeros(self.horizon))
            self.data_der_force_reward = np.array(np.zeros(self.horizon))

            # policy/controller data
            self.data_action = np.array(
                np.zeros((self.horizon, self.robots[0].action_dim)))