def _get_state_obs(self):
        '''
        Compute the observation at the current state.
        '''

        # Return superclass observation.
        if self.observe_joints:
            obs = super(PegInsertionEnv, self)._get_state_obs()
        else:
            obs = np.zeros(0)

        # Return superclass observation stacked with the ft observation.
        if not self.initialized:
            ft_obs = np.zeros(6)
        else:
            # Compute F/T sensor data
            ft_obs = self.sim.data.sensordata

            obs = obs / self.obs_scaling

        if self.use_ft_sensor:
            obs = np.concatenate([obs, ft_obs])

        # End effector position
        pos, rot = forwardKinSite(self.sim,
                                  ['peg_tip', 'hole_base', 'hole_top'])

        if self.use_rel_pos_err:
            pos_obs = pos[1] - pos[0]
            quat_peg_tip = mat2Quat(rot[0])
            quat_hole_base = mat2Quat(rot[1])
            rot_obs = subQuat(quat_hole_base, quat_peg_tip).copy()
            hole_top_obs = pos[2] - pos[0]
        else:
            # TODO: we probably also want the EE position in the world
            pos_obs = pos[1].copy()
            rot_obs = mat2Quat(rot[1])
            hole_top_obs = pos[2]

        # End effector velocity
        peg_tip_id = self.model.site_name2id('peg_tip')
        jacp, jacr = forwardKinJacobianSite(self.sim,
                                            peg_tip_id,
                                            recompute=False)
        peg_tip_lin_vel = jacp.dot(self.sim.data.qvel)
        peg_tip_rot_vel = jacr.dot(self.sim.data.qvel)

        # Transform into end effector frame
        if self.in_peg_frame:
            pos_obs = rot[0].T.dot(pos_obs)
            hole_top_obs = rot[0].T.dot(hole_top_obs)
            peg_tip_lin_vel = rot[0].T.dot(peg_tip_lin_vel)
            peg_tip_rot_vel = rot[0].T.dot(peg_tip_rot_vel)

        obs = np.concatenate([
            obs, pos_obs, rot_obs, peg_tip_lin_vel, peg_tip_rot_vel,
            hole_top_obs
        ])

        return obs
    def get_torque(self):
        '''
        Update the impedance control setpoint and compute the torque.
        '''
        # Compute the pose difference.
        pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False)
        quat = mat2Quat(mat)
        dx = self.pos_set - pos
        dr = subQuat(self.quat_set, quat)
        dframe = np.concatenate((dx,dr))

        # Compute generalized forces from a virtual external force.
        jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False)
        J = np.vstack((jpos, jrot))
        external_force = J.T.dot(self.stiffness*dframe) # virtual force on the end effector

        # Cancel other dynamics and add virtual damping using inverse dynamics.
        acc_des = -self.damping*self.sim.data.qvel
        self.sim.data.qacc[:] = acc_des
        mujoco_py.functions.mj_inverse(self.model, self.sim.data)
        id_torque = self.sim.data.qfrc_inverse[:]
        
        generalized_force = id_torque + external_force
        if self.controlled_joints is None:
            torque = generalized_force
        else:
            torque = generalized_force[self.controlled_joints]
        
        return torque
    def _get_reward(self, state, action):
        '''
        Compute single step reward.
        '''
        # compute position and rotation error
        pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base'], recompute=False)
        pos_err = pos[0] - pos[1]
        dist = np.sqrt(pos_err.dot(pos_err))
        peg_quat = mat2Quat(rot[0])
        hole_quat = mat2Quat(rot[1])
        rot_err = subQuat(peg_quat, hole_quat)

        pose_err = self.sim.data.qpos - self.good_states[0]
        
        peg_tip_id = self.model.site_name2id('peg_tip')
        jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False)
        peg_tip_vel = jacp.dot(self.data.qvel[:])
        # print("reward_dist: {}".format(dist))
        
        # quadratic cost on the error and action
        # rotate the cost terms to align with the hole
        Q_pos = rotate_cost_by_matrix(self.Q_pos, rot[1].T)
        # Q_vel = rotate_cost_by_matrix(self.Q_vel,rot[1].T)
        Q_rot = self.Q_rot

        reward_info = dict()
        reward = 0.

        # reward_info['quaternion_reward'] = -rot_err.dot(Q_rot).dot(rot_err)
        if self.quadratic_rot_cost:
            reward_info['quadratic_orientation_reward'] = -rot_err.dot(Q_rot).dot(rot_err)
            reward += reward_info['quadratic_orientation_reward']

        if self.quadratic_cost:
            reward_info['quadratic_position_reward'] = -pos_err.dot(Q_pos).dot(pos_err)
            reward += reward_info['quadratic_position_reward']

        if self.linear_cost:
            reward_info['linear_position_reward'] = -np.sqrt(pos_err.dot(Q_pos).dot(pos_err))
            reward += reward_info['linear_position_reward']

        if self.logarithmic_cost:
            rew_scale = 2
            eps = 10.0**(-rew_scale)
            zero_crossing = 0.05
            reward_info['logarithmic_position_reward'] = -np.log10(np.sqrt(pos_err.dot(Q_pos).dot(pos_err))/zero_crossing*(1-eps) + eps)
            reward += reward_info['logarithmic_position_reward']

        if self.sparse_cost:
            reward_info['sparse_position_reward'] = 10.0 if np.sqrt(pos_err.dot(pos_err)) < 1e-2 else 0
            reward += reward_info['sparse_position_reward']

        if self.regularize_pose:
            reward_info['pose_regularizer_reward'] = -pose_err.dot(self.Q_pose_reg).dot(pose_err)
            reward += reward_info['pose_regularizer_reward']
        
        # reward_info['velocity_reward'] = -np.sqrt(peg_tip_vel.dot(Q_vel).dot(peg_tip_vel)) 
        # reward += reward_info['velocity_reward']

        return reward, reward_info
    def get_torque(self):
        '''
        Update the impedance control setpoint and compute the torque.
        '''
        # Compute the pose difference.
        pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False)
        quat = mat2Quat(mat)
        dx = self.pos_set - pos
        dr = subQuat(self.quat_set, quat) # Original
        dframe = np.concatenate((dx,dr))

        # Compute generalized forces from a virtual external force.
        jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False)
        J = np.vstack((jpos[:,self.sim_qvel_idx], jrot[:,self.sim_qvel_idx]))
        cartesian_acc_des = self.stiffness*dframe - self.damping*J.dot(self.sim.data.qvel[self.sim_qvel_idx])
        impedance_acc_des = J.T.dot(np.linalg.solve(J.dot(J.T) + 1e-6*np.eye(6), cartesian_acc_des))

        # Add stiffness and damping in the null space of the the Jacobian
        projection_matrix = J.T.dot(np.linalg.solve(J.dot(J.T), J))
        projection_matrix = np.eye(projection_matrix.shape[0]) - projection_matrix
        null_space_control = -self.null_space_damping*self.sim.data.qvel[self.sim_qvel_idx]
        null_space_control += -self.null_space_stiffness*(self.sim.data.qpos[self.sim_qpos_idx] - self.nominal_qpos)
        impedance_acc_des += projection_matrix.dot(null_space_control)

        # Compute torques via inverse dynamics.
        acc_des = np.zeros(self.sim.model.nv)
        acc_des[self.sim_qvel_idx] = impedance_acc_des
        self.sim.data.qacc[:] = acc_des
        mujoco_py.functions.mj_inverse(self.model, self.sim.data)
        id_torque = self.sim.data.qfrc_inverse[self.sim_actuators_idx].copy()
        
        return id_torque
Beispiel #5
0
    def _get_state_obs(self):
        '''
        Compute the observation at the current state.
        '''
        if not self.initialized:
            # 14 positions and 13 velocities
            obs = np.zeros(27)
            obs[10] = 1.
        else:
            # Return superclass observation.
            obs = super(PushingEnv, self)._get_state_obs()

        if not self.initialized:
            ee_lin_vel_obs = np.zeros(3)
            ee_rot_vel_obs = np.zeros(3)
        else:
            peg_tip_id = self.model.site_name2id('peg_tip')
            jacp, jacr = forwardKinJacobianSite(self.sim,
                                                peg_tip_id,
                                                recompute=False)
            ee_lin_vel_obs = jacp.dot(self.sim.data.qvel)
            ee_rot_vel_obs = jacr.dot(self.sim.data.qvel)

        obs = np.concatenate([ee_lin_vel_obs, ee_rot_vel_obs])

        # Return superclass observation stacked with the ft observation.
        if not self.initialized:
            ft_obs = np.zeros(6)
        else:
            # Compute F/T sensor data
            ft_obs = self.sim.data.sensordata
            obs = obs / self.obs_scaling

        if self.use_ft_sensor:
            obs = np.concatenate([obs, ft_obs])

        return obs