def _get_site_pose(self, site_name, no_rot=False): if no_rot: quat = np.zeros(4) else: # this is very inefficient, avoid computation when possible quat = rotations.mat2quat(self.sim.data.get_site_xmat(site_name)) return np.r_[self.sim.data.get_site_xpos(site_name), quat]
def get_target_quat(self, pad=True) -> np.ndarray: """ Get target rotation in quaternion for all objects. """ return self._get_target_obs( lambda n: rotation.quat_normalize( rotations.mat2quat(self.mj_sim.data.get_body_xmat(n))), pad=pad, )
def _get_achieved_goal(self): """ Cube center position (3), cube center rotation (4) """ # Cube center position and rotation (quaternion). cube_qpos = self.sim.data.get_site_xpos('rubik:cube_center') cube_rot = rotations.mat2quat(self.sim.data.get_site_xmat('rubik:cube_center')) cube_qpos = np.concatenate([cube_qpos, cube_rot]) assert cube_qpos.shape == (7,) return cube_qpos
def _set_action(self, action): assert action.shape == (4, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] # rotation grip_mat = rotations.quat2mat(self.sim.data.mocap_quat) grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0]))) grip_radius = (math.atan2(grip_v[0], grip_v[1]) + 2 * math.pi) % (2 * math.pi) if grip_radius > math.pi: grip_radius = (grip_radius - 2 * math.pi) angle_ctrl = grip_radius + action[3] rot_mat = np.array([[1, 0, 0], [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)], [0, math.sin(angle_ctrl), math.cos(angle_ctrl)]]) axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat)) pos_ctrl *= 0.05 # limit maximum change in position gripper_ctrl = np.array([0, 0]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: for i in range(10): self.sim.step() pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _get_obs(self): """ This is where I make sure to grab the observations for the position and the velocities. Potential area to grab the rgb and depth images. """ ee = 'robot0:grip' dt = self.sim.nsubsteps * self.sim.model.opt.timestep ee_pos = self.sim.data.get_site_xpos(ee) ee_quat = rotations.mat2quat(self.sim.data.get_site_xmat(ee)) # Position and angular velocity respectively ee_velp = self.sim.data.get_site_xvelp(ee) * dt # to remove time dependency ee_velr = self.sim.data.get_site_xvelr(ee) * dt obs = np.concatenate([ee_pos, ee_quat, ee_velp, ee_velr]) return { 'observation': obs.copy(), 'achieved_goal': ee_pos.copy(), 'desired_goal': self.goal.copy() }
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self._reset(new_goal) # TFDebugger.reset() # TFDebugger.step(self._raw_env.viewer) curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': solver = self._ikp_solver_l jac_solver = self._jac_solver_l joint_lims = self._raw_env.arm_l_joint_lims achieved_pos = obs['observation'][32:35] obj_l_rel_pos = obs['observation'][44:47] obj_pos = obj_l_rel_pos + achieved_pos target_pose = np.r_[obj_pos, np.pi - 0.9, 0.01, 0.01] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:left').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:left')) target_pose[3:] = rotations.quat_mul(target_pose[3:], q) prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': solver = self._ikp_solver_r jac_solver = self._jac_solver_r joint_lims = self._raw_env.arm_r_joint_lims achieved_pos = obs['observation'][35:38] obj_r_rel_pos = obs['observation'][47:50] obj_pos = obj_r_rel_pos + achieved_pos target_pose = np.r_[obj_pos, -np.pi + 0.9, 0.01, np.pi] target_pose = np.r_[target_pose[:3], rotations.euler2quat(target_pose[3:])] if self._phase == 3: target_pose[:3] = self._raw_env.sim.data.get_site_xpos( 'target0:right').copy() q = rotations.mat2quat( self._raw_env.sim.data.get_site_xmat('target0:right')) target_pose[3:] = rotations.quat_mul(q, target_pose[3:]) prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue if self._phase == 0: u[7] = -1.0 u[15] = -1.0 target_pose[2] += 0.1 elif self._phase == 1: u[7] = -1.0 u[15] = -1.0 target_pose[2] -= 0.002 elif self._phase == 2: u[7] = -1 + self._phase_steps / 3. u[15] = -1 + self._phase_steps / 3. elif self._phase == 3: u[7] = 1.0 u[15] = 1.0 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) if self.use_mocap_ctrl: if self._phase < 1: controller_k = 3.0 else: controller_k = 2.5 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) else: controller_k = 0.1 err_rot = 0.0 target_pose[:3] -= self._base_offset if self.use_qp_solver: if not self.check_joint_limits: joint_lims = None curr_pose[:3] -= self._base_offset target_q = self._position_ik_qp(curr_pose, target_pose, curr_q, jac_solver, joint_lims) else: target_q = self._position_ik(target_pose, curr_q, solver) if target_q is not None: err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.05: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.007) and err_rot < 0.05: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 6: self._phase = 3 self._phase_steps = 0 self._locked_l_to_r_tf = tf.get_tf(curr_grp_poses['r'], curr_grp_poses['l']) u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def predict(self, obs, **kwargs): u = np.zeros(self._env.action_space.shape) new_goal = obs['desired_goal'] if self._goal is None or np.any(self._goal != new_goal): self.reset(new_goal) obj_radius = self._object_size[0] obj_achieved_alt = obs['achieved_goal'].item() obj_achieved_pose = obs['observation'][44:51] if np.all(obj_achieved_pose[3:] == 0): obj_achieved_pose[3] = 1.0 # tf.render_pose(obj_achieved_pose, self._raw_env.viewer, label="O") grp_xrot = 0.9 + obj_achieved_alt * 2.0 curr_grp_poses = { a: np.r_[self._raw_env.sim.data.get_site_xpos(f'gripper_{a}_center'), rotations.mat2quat( self._raw_env.sim.data. get_site_xmat(f'gripper_{a}_center')), ] for a in ('l', 'r') } pos_errors = [] for arm in ('l', 'r'): if arm == 'l': transf = np.r_[0., obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="L") grp_target_pos = pose[:3] grp_target_rot = np.r_[np.pi - grp_xrot, 0.01, 0.01] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_l curr_q = obs['observation'][:7] u_masked = u[:7] elif arm == 'r': transf = np.r_[0., -obj_radius, 0., 1., 0., 0., 0.] if self._phase == 3: transf[1] *= 0.9 transf[2] = 0.05 pose = tf.apply_tf(transf, obj_achieved_pose) # tf.render_pose(pose, self._raw_env.viewer, label="R") grp_target_pos = pose[:3] grp_target_rot = np.r_[-np.pi + grp_xrot, 0.01, np.pi] target_pose = np.r_[grp_target_pos, rotations.euler2quat(grp_target_rot)] prev_err = self._prev_err_r curr_q = obs['observation'][16:23] u_masked = u[8:15] else: continue u[7] = -1.0 u[15] = -1.0 if self._phase == 0: target_pose[2] += 0.1 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 1: target_pose[2] += 0.01 target_pose[1] += 0.05 * np.sign(target_pose[1]) elif self._phase == 2: target_pose[2] += 0.01 elif self._phase == 3: target_pose[2] += 0.00 if self._raw_env.viewer is not None: tf.render_pose(target_pose.copy(), self._raw_env.viewer) curr_pose = curr_grp_poses[arm].copy() err_pose = curr_pose - target_pose err_pos = np.linalg.norm(err_pose[:3]) pos_errors.append(err_pos) controller_k = 2.0 err_rot = tf.quat_angle_diff(curr_pose[3:], target_pose[3:]) target_q = self._raw_env.mocap_ik(-err_pose, arm) err_q = curr_q - target_q u_masked[:] = self._controller(err_q, prev_err, controller_k) self._phase_steps += 1 if self._phase == 0 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 1 self._phase_steps = 0 elif self._phase == 1 and np.all( np.array(pos_errors) < 0.03) and err_rot < 0.1: self._phase = 2 self._phase_steps = 0 elif self._phase == 2: if self._phase_steps > 30: self._phase = 3 self._phase_steps = 0 u = np.clip(u, self._env.action_space.low, self._env.action_space.high) return u
def _get_achieved_goal(self): """ Cube center position (3), cube center rotation (4), layer angle (1) total 8. """ # Cube center position and rotation (quaternion). cube_qpos = self.sim.data.get_site_xpos('rubik:cube_center') cube_rot = rotations.mat2quat( self.sim.data.get_site_xmat('rubik:cube_center')) # Cube ball joint angles cube_joint_pos, cube_joint_vel = shadow_get_obs(self.sim, name='rubik') assert cube_joint_pos.shape == (8, ) if self.rot_layer in ["up", "up'"]: box_0_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[4], 2)) box_1_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[5], 2)) box_0_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[6], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_0_0_1_euler, box_1_0_1_euler, box_0_1_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 U_x_angle = [ box_1_1_1_euler[0], box_1_0_1_euler[0], box_0_1_1_euler[0], box_0_0_1_euler[0] ] U_y_angle = [ box_1_1_1_euler[1], box_1_0_1_euler[1], box_0_1_1_euler[1], box_0_0_1_euler[1] ] U_z_angle = [ box_1_1_1_euler[2], box_1_0_1_euler[2], box_0_1_1_euler[2], box_0_0_1_euler[2] ] U_bool = ((np.max(U_x_angle) - np.min(U_x_angle)) < 0.1) & \ ((np.max(U_y_angle) - np.min(U_y_angle)) < 0.1) & \ ((np.max(U_z_angle) - np.min(U_z_angle)) < 0.1) if U_bool: angle = [U_z_angle[0]] else: angle = [0] elif self.rot_layer in ["right", "right'"]: box_0_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[2], 2)) box_1_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[3], 2)) box_0_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[6], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_0_1_0_euler, box_1_1_0_euler, box_0_1_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 R_x_angle = [ box_1_1_1_euler[0], box_1_1_0_euler[0], box_0_1_1_euler[0], box_0_1_0_euler[0] ] R_y_angle = [ box_1_1_1_euler[1], box_1_1_0_euler[1], box_0_1_1_euler[1], box_0_1_0_euler[1] ] R_z_angle = [ box_1_1_1_euler[2], box_1_1_0_euler[2], box_0_1_1_euler[2], box_0_1_0_euler[2] ] R_bool = ((np.max(R_x_angle) - np.min(R_x_angle)) < 0.1) & \ ((np.max(R_y_angle) - np.min(R_y_angle)) < 0.1) & \ ((np.max(R_z_angle) - np.min(R_z_angle)) < 0.1) if R_bool: angle = [R_y_angle[0]] else: angle = [0] elif self.rot_layer in ["front", "front'"]: box_1_0_0_euler = rotations.quat2euler( np.round(cube_joint_pos[1], 2)) box_1_1_0_euler = rotations.quat2euler( np.round(cube_joint_pos[3], 2)) box_1_0_1_euler = rotations.quat2euler( np.round(cube_joint_pos[5], 2)) box_1_1_1_euler = rotations.quat2euler( np.round(cube_joint_pos[7], 2)) for data in [ box_1_0_0_euler, box_1_1_0_euler, box_1_0_1_euler, box_1_1_1_euler ]: data[data < -3] += 6.28 F_x_angle = [ box_1_0_1_euler[0], box_1_0_0_euler[0], box_1_1_1_euler[0], box_1_1_0_euler[0] ] F_y_angle = [ box_1_0_1_euler[1], box_1_0_0_euler[1], box_1_1_1_euler[1], box_1_1_0_euler[1] ] F_z_angle = [ box_1_0_1_euler[2], box_1_0_0_euler[2], box_1_1_1_euler[2], box_1_1_0_euler[2] ] F_bool = ((np.max(F_x_angle) - np.min(F_x_angle)) < 0.1) & \ ((np.max(F_y_angle) - np.min(F_y_angle)) < 0.1) & \ ((np.max(F_z_angle) - np.min(F_z_angle)) < 0.1) if F_bool: angle = [F_x_angle[0]] else: angle = [0] cube_qpos = np.concatenate([cube_qpos, cube_rot, angle]) assert cube_qpos.shape == (8, ) return cube_qpos
def _get_obs(self, obs_arg=None): # positions #import ipdb; ipdb.set_trace() #cam_pos = self.sim.data.get_camera_xpos("ext_camera_0") grip_pos = self.sim.data.get_site_xpos('robot0:grip') dt = self.sim.nsubsteps * self.sim.model.opt.timestep grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) if self.has_object: object_pos = self.sim.data.get_site_xpos('object0') # rotations object_rot = rotations.mat2quat(self.sim.data.get_site_xmat('object0')) # velocities object_velp = self.sim.data.get_site_xvelp('object0') * dt object_velr = self.sim.data.get_site_xvelr('object0') * dt # gripper state object_rel_pos = object_pos - grip_pos object_velp -= grip_velp # everytime the object position might change and based on that the rim point # also needs to change. So I will add to the translated rim pt every time if self.rim_pts is not None: self.translated_rim_pt = self.chosen_rim_pt + object_pos gripper_to_rim = self.translated_rim_pt - grip_pos else: gripper_to_rim = np.zeros(3,) else: object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric # build observation obs_parts = [] if self.relative_obs: obs_parts.extend([grip_pos - object_pos.ravel(), object_pos.ravel() - self._init_object_pos]) else: obs_parts.extend([grip_pos, object_pos.ravel(), object_rel_pos.ravel()]) # # if not self.has_object: # achieved_goal = grip_pos.copy() # else: # achieved_goal = np.squeeze(object_pos.copy()) obs_parts.append(gripper_state) if self.include_rotation: #3 obs_parts.append(object_rot.ravel()) if self.include_velocity: obs_parts.append(object_velp.ravel()) if self.include_rotation: obs_parts.append(object_velr.ravel()) obs_parts.extend([grip_velp, gripper_vel]) if self.include_obj_size: # 3 obj_size_site_id = self.sim.model.site_name2id('object0:size') obj_size = self.sim.model.site_size[obj_size_site_id] obs_parts.append(obj_size) obs = np.concatenate(obs_parts) # if obs_arg == "25": # obs = np.concatenate([ # grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), # object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, # ]) # else: # obs = np.concatenate([ # grip_pos, object_pos.ravel(), object_rel_pos.ravel(), object_rot.ravel(), # ]) return { 'observation': obs.copy(), 'achieved_goal': self.observed_achieved_goal, 'desired_goal': self.observed_goal, 'gripper_to_rim': gripper_to_rim.copy(), }
def _set_action(self, action): assert action.shape == (4, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] grip_mat = rotations.quat2mat(self.sim.data.mocap_quat) grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0]))) grip_radius = (math.atan2(grip_v[0], grip_v[1]) + 2 * math.pi) % (2 * math.pi) if grip_radius > math.pi: grip_radius = (grip_radius - 2 * math.pi) angle_ctrl = grip_radius + action[3] rot_mat = np.array([[1, 0, 0], [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)], [0, math.sin(angle_ctrl), math.cos(angle_ctrl)]]) axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat)) pos_ctrl *= 0.05 # limit maximum change in position gripper_ctrl = np.array([1, 1]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: self.sim.step() img = self.sim.render(width=500, height=500, camera_name="external_camera_1") cv2.imwrite("/checkpoint/jdong1021/grasp_sample1.png", img) pos_ctrl = np.array([0.0, 0.0, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) # logging # grip_pos = self.sim.data.get_site_xpos('robot0:grip') # obs_pos = self.sim.data.get_site_xpos('object0') # offset = obs_pos - grip_pos # print(offset, np.linalg.norm(offset, axis = -1)) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() img = self.sim.render(width=500, height=500, camera_name="external_camera_1") cv2.imwrite("/checkpoint/jdong1021/grasp_sample2.png", img) pos_ctrl = np.array([0.0, 0.0, 0.2]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def mat_to_pose(mat: np.ndarray) -> np.ndarray: rot_mat = mat[..., :3, :3] quat = rotations.mat2quat(rot_mat) pos = mat[..., :3, 3] return np.concatenate([pos, quat], axis=-1)