def get_pose(self): offset_mat = np.eye(4) q_wxyz = np.concatenate((self.offset_ori[3:], self.offset_ori[:3])) offset_mat[:3, :3] = T.quat2mat(q_wxyz) offset_mat[:3, -1] = self.offset_pos if self.camera_link_name != "worldbody": pos_body_in_world = self.mujoco_env.sim.data.get_body_xpos( self.camera_link_name) rot_body_in_world = self.mujoco_env.sim.data.get_body_xmat( self.camera_link_name).reshape((3, 3)) pose_body_in_world = T.make_pose(pos_body_in_world, rot_body_in_world) total_pose = np.array(pose_body_in_world).dot(np.array(offset_mat)) position = total_pose[:3, -1] rot = total_pose[:3, :3] wxyz = T.mat2quat(rot) xyzw = np.concatenate((wxyz[1:], wxyz[:1])) else: position = np.array(self.offset_pos) xyzw = self.offset_ori return np.concatenate((position, xyzw))
def reset(self, deterministic=False): """ Sets initial pose of arm and grippers. Overrides robot joint configuration if we're using a deterministic reset (e.g.: hard reset from xml file) Args: deterministic (bool): If true, will not randomize initializations within the sim Raises: ValueError: [Invalid noise type] """ init_qpos = np.array(self.init_qpos) if not deterministic: # Determine noise if self.initialization_noise["type"] == "gaussian": noise = np.random.randn(len(self.init_qpos)) * self.initialization_noise["magnitude"] elif self.initialization_noise["type"] == "uniform": noise = np.random.uniform(-1.0, 1.0, len(self.init_qpos)) * self.initialization_noise["magnitude"] else: raise ValueError("Error: Invalid noise type specified. Options are 'gaussian' or 'uniform'.") init_qpos += noise # Set initial position in sim self.sim.data.qpos[self._ref_joint_pos_indexes] = init_qpos # Load controllers self._load_controller() # Update base pos / ori references self.base_pos = self.sim.data.get_body_xpos(self.robot_model.robot_base) self.base_ori = T.mat2quat(self.sim.data.get_body_xmat(self.robot_model.robot_base).reshape((3, 3)))
def point_down(env): current_pos = env._right_hand_pos target_rot_X = T.rotation_matrix(0, np.array([1, 0, 0]), point=current_pos) target_rot_Y = T.rotation_matrix(math.pi, np.array([0, 1, 0]), point=current_pos) target_rot_Z = T.rotation_matrix(math.pi, np.array([0, 0, 1]), point=current_pos) target_rot = np.matmul(target_rot_Z, np.matmul(target_rot_Y, target_rot_X)) target_quat = T.mat2quat(target_rot) done_task = False while not done_task: current_pos = env._right_hand_pos current_quat = env._right_hand_quat dquat = T.quat_slerp(current_quat, target_quat, 0.01) dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat)) if np.abs(dquat[3] - 1.0) < 1e-4: done_task = True grasp = -1 dpos = np.zeros(3) action = np.concatenate([dpos, dquat, [grasp]]) obs, reward, done, info = env.step(action) if RENDER: env.render()
def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects object_placements = self.placement_initializer.sample() # Loop through all objects and reset their positions for obj_pos, obj_quat, obj in object_placements.values(): # If prehensile, set the object normally if self.prehensile: self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate([np.array(obj_pos), np.array(obj_quat)])) # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping # the object initially else: eef_rot_quat = T.mat2quat( T.euler2mat( [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0])) obj_quat = T.quat_multiply(obj_quat, eef_rot_quat) for j in range(100): # Set object in hand self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate( [self._eef0_xpos, np.array(obj_quat)])) # Close gripper (action = 1) and prevent arm from moving if self.env_configuration == 'bimanual': # Execute no-op action with gravity compensation torques = np.concatenate([ self.robots[0].controller["right"]. torque_compensation, self.robots[0]. controller["left"].torque_compensation ]) self.sim.data.ctrl[self.robots[ 0]._ref_joint_actuator_indexes] = torques # Execute gripper action self.robots[0].grip_action( gripper=self.robots[0].gripper["right"], gripper_action=[1]) else: # Execute no-op action with gravity compensation self.sim.data.ctrl[self.robots[0]._ref_joint_actuator_indexes] =\ self.robots[0].controller.torque_compensation self.sim.data.ctrl[self.robots[1]._ref_joint_actuator_indexes] = \ self.robots[1].controller.torque_compensation # Execute gripper action self.robots[0].grip_action( gripper=self.robots[0].gripper, gripper_action=[1]) # Take forward step self.sim.step()
def rotate_camera(self, point, axis, angle): """ Rotate the camera view about a direction (in the camera frame). Args: point (None or 3-array): (x,y,z) cartesian coordinates about which to rotate camera in camera frame. If None, assumes the point is the current location of the camera axis (3-array): (ax,ay,az) axis about which to rotate camera in camera frame angle (float): how much to rotate about that direction Returns: 2-tuple: pos: (x,y,z) updated camera position quat: (x,y,z,w) updated camera quaternion orientation """ # current camera rotation + pos camera_pos = np.array(self.env.sim.data.get_mocap_pos(self.mover_body_name)) camera_rot = T.quat2mat(T.convert_quat(self.env.sim.data.get_mocap_quat(self.mover_body_name), to="xyzw")) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, axis, point=point) camera_pose = np.zeros((4, 4)) camera_pose[:3, :3] = camera_rot camera_pose[:3, 3] = camera_pos camera_pose = camera_pose @ R # Update camera pose pos, quat = camera_pose[:3, 3], T.mat2quat(camera_pose[:3, :3]) self.set_camera_pose(pos=pos, quat=quat) return pos, quat
def move_orient(self, env, q1): q0 = env.observation_spec()['eef_quat'] x_target = env.observation_spec()['eef_pos'] fraction_size = 50 for i in range(fraction_size): q_target = T.quat_slerp(q0, q1, fraction=(i + 1) / fraction_size) steps = 0 lamda = 0.01 current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) while (np.linalg.norm(T.mat2euler(drotation)) > 0.01): current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) dquat = T.mat2quat(drotation) x_current = env.observation_spec()['eef_pos'] d_pos = np.clip((x_target - x_current) * lamda, -0.05, 0.05) d_pos = [0, 0, 0] action = np.concatenate((d_pos, dquat, [self.grasp])) env.step(action) env.render() steps += 1 if (steps > 20): break return
def collect_human_trajectory(env, device): """ Use the device (keyboard or SpaceNav 3D mouse) to collect a demonstration. The rollout trajectory is saved to files in npz format. Modify the DataCollectionWrapper wrapper to add new fields or change data formats. Args: env: environment to control device (instance of Device class): to receive controls from the device """ obs = env.reset() # rotate the gripper so we can see it easily env.set_robot_joint_positions([0, -1.18, 0.00, 2.18, 0.00, 0.57, 1.5708]) env.viewer.set_camera(camera_id=2) env.render() # episode terminates on a spacenav reset input or if task is completed reset = False task_completion_hold_count = -1 # counter to collect 10 timesteps after reaching goal device.start_control() while not reset: state = device.get_controller_state() dpos, rotation, grasp, reset = ( state["dpos"], state["rotation"], state["grasp"], state["reset"], ) # convert into a suitable end effector action for the environment current = env._right_hand_orn drotation = current.T.dot( rotation) # relative rotation of desired from current dquat = T.mat2quat(drotation) grasp = grasp - 1. # map 0 to -1 (open) and 1 to 0 (closed halfway) action = np.concatenate([dpos, dquat, [grasp]]) obs, reward, done, info = env.step(action) env.render() if task_completion_hold_count == 0: break # state machine to check for having a success for 10 consecutive timesteps if env._check_success(): if task_completion_hold_count > 0: task_completion_hold_count -= 1 # latched state, decrement count else: task_completion_hold_count = 10 # reset count on first success timestep else: task_completion_hold_count = -1 # null the counter if there's no success # cleanup for end of data collection episodes env.close()
def reset_goal(self): """ Resets the goal to the current pose of the robot """ self.reference_target_pos = self.ee_pos self.reference_target_orn = T.mat2quat(self.ee_ori_mat) # Sync pybullet state as well self.sync_state()
def joint_positions_for_eef_command(self, dpos, rotation, update_targets=False): """ This function runs inverse kinematics to back out target joint positions from the provided end effector command. Args: dpos (np.array): a 3 dimensional array corresponding to the desired change in x, y, and z end effector position. rotation (np.array): a rotation matrix of shape (3, 3) corresponding to the desired rotation from the current orientation of the end effector. update_targets (bool): whether to update ik target pos / ori attributes or not Returns: list: A list of size @num_joints corresponding to the target joint angles. """ # Calculate the rotation # This equals: inv base offset * eef * offset accounting for deviation between mujoco eef and pybullet eef rotation = self.base_orn_offset_inv @ self.ee_ori_mat @ rotation @ self.rotation_offset[:3, :3] # Determine targets based on whether we're using interpolator(s) or not if self.interpolator_pos or self.interpolator_ori: targets = (self.ee_pos + dpos + self.ik_robot_target_pos_offset, T.mat2quat(rotation)) else: targets = (self.ik_robot_target_pos + dpos, T.mat2quat(rotation)) # convert from target pose in base frame to target pose in bullet world frame world_targets = self.bullet_base_pose_to_world_pose(targets) # Update targets if required if update_targets: # Scale and increment target position self.ik_robot_target_pos += dpos # Convert the desired rotation into the target orientation quaternion self.ik_robot_target_orn = T.mat2quat(rotation) # Converge to IK solution arm_joint_pos = None for bullet_i in range(self.converge_steps): arm_joint_pos = self.inverse_kinematics(world_targets[0], world_targets[1]) self.sync_ik_robot(arm_joint_pos, sync_last=True) return arm_joint_pos
def _eef1_xquat(self): """ End Effector 1 orientation as a (x,y,z,w) quaternion Note that this draws the orientation from the "ee" site, NOT the gripper site, since the gripper orientations are inconsistent! Returns: np.array: (x,y,z,w) quaternion for EEF1 """ return mat2quat(self._eef1_xmat)
def get_sim2real_posquat(env): sim_eef_pose = deepcopy(env.robots[0].pose_in_base_from_name('gripper0_eef')) angle = np.deg2rad(-90) direction_axis = [0, 0, 1] rotation_matrix = transform_utils.rotation_matrix(angle, direction_axis) sim_pose_rotated = np.dot(rotation_matrix, sim_eef_pose) sim_eef_pos_rotated = deepcopy(sim_pose_rotated)[:3, 3] sim_eef_quat_rotated = deepcopy(transform_utils.mat2quat(sim_pose_rotated)) return sim_eef_pos_rotated, sim_eef_quat_rotated
def joint_positions_for_eef_command(self, right, left): """ This function runs inverse kinematics to back out target joint positions from the provided end effector command. Same arguments as @get_control. Returns: A list of size @num_joints corresponding to the target joint angles. """ dpos_right = right["dpos"] dpos_left = left["dpos"] self.target_pos_right = self.ik_robot_target_pos_right + np.array( [0, 0, 0.913]) self.target_pos_left = self.ik_robot_target_pos_left + np.array( [0, 0, 0.913]) self.ik_robot_target_pos_right += dpos_right self.ik_robot_target_pos_left += dpos_left rotation_right = right["rotation"] rotation_left = left["rotation"] self.ik_robot_target_orn_right = T.mat2quat(rotation_right) self.ik_robot_target_orn_left = T.mat2quat(rotation_left) # convert from target pose in base frame to target pose in bullet world frame world_targets_right = self.bullet_base_pose_to_world_pose( (self.ik_robot_target_pos_right, self.ik_robot_target_orn_right)) world_targets_left = self.bullet_base_pose_to_world_pose( (self.ik_robot_target_pos_left, self.ik_robot_target_orn_left)) # Empirically, more iterations aren't needed, and it's faster for _ in range(5): arm_joint_pos = self.inverse_kinematics( world_targets_right[0], world_targets_right[1], world_targets_left[0], world_targets_left[1], rest_poses=self.robot_jpos_getter(), ) self.sync_ik_robot(arm_joint_pos, sync_last=True) return arm_joint_pos
def set_base_ori(self, rot): """ Rotates robot by rotation @rot from its original orientation. Args: rot (3-array): (r,p,y) euler angles specifying the orientation for the robot base """ # xml quat assumes w,x,y,z so we need to convert to this format from outputted x,y,z,w format from fcn rot = mat2quat(euler2mat(rot))[[3, 0, 1, 2]] self._elements["root_body"].set("quat", array_to_string(rot))
def _hand_quat(self): """ Returns: dict: each arm-specific entry specifies the eef quaternion in base frame of robot. """ vals = {} orns = self._hand_orn for arm in self.arms: vals[arm] = T.mat2quat(orns[arm]) return vals
def get_real2sim_posquat(pos, quat): real_eef_pose = transform_utils.pose2mat((pos,quat)) angle = np.deg2rad(90) direction_axis = [0, 0, 1] rotation_matrix = transform_utils.rotation_matrix(angle, direction_axis) real_pose_rotated = np.dot(rotation_matrix, real_eef_pose) real_eef_pos_rotated = deepcopy(real_pose_rotated)[:3, 3] real_eef_quat_rotated = deepcopy(transform_utils.mat2quat(real_pose_rotated)) return real_eef_pos_rotated, real_eef_quat_rotated
def move(self, pos, rotation): # First align the gripper angle q1 = T.mat2quat(rotation) self.move_orient(self.env, q1) # Now move to x-y coordinate given by the pos self.move_xy(self.env, pos[:2], rotation) # Now move to x-y-z target position self.move_xyz(self.env, pos, rotation)
def set_base_ori(self, rot): """ Rotates robot by rotation @rot from its original orientation. Args: rot (3-array): (r,p,y) euler angles specifying the orientation for the robot base """ node = self.worldbody.find("./body[@name='{}']".format(self._root_)) # xml quat assumes w,x,y,z so we need to convert to this format from outputted x,y,z,w format from fcn rot = mat2quat(euler2mat(rot))[[3, 0, 1, 2]] node.set("quat", array_to_string(rot))
def move_z(self, env, z_target, target_rotation): # target_rotation = np.array([[-1., 0., 0.], [0., 1., 0.], [0., 0., -1.]]) x_current = env.observation_spec()['eef_pos'] x_target = np.copy(x_current) x_target[2] = z_target steps = 0 lamda = 0.1 while (np.linalg.norm(x_target - x_current) > 0.00001): if (np.linalg.norm(x_target - x_current) < 0.01): lamda = lamda * 1.01 else: lamda = 0.1 x_current = env.observation_spec()['eef_pos'] current = env._right_hand_orn drotation = current.T.dot(target_rotation) dquat = T.mat2quat(drotation) d_pos = (x_target - x_current) * lamda action = np.concatenate((d_pos, dquat, [self.grasp])) env.step(action) env.render() for i in range(4): # Now do action for zero xyz change so that bot stabilizes current = env._right_hand_orn drotation = current.T.dot(target_rotation) dquat = T.mat2quat(drotation) action = np.concatenate(([0, 0, 0], dquat, [self.grasp])) env.step(action) env.render() steps += 1 if (steps > 500): break return
def _get_orientation_geom(self, name): """ Gets the position and quaternion for a geom """ pos = self.env.sim.data.geom_xpos[self.env.sim.model.geom_name2id(name)] R = self.env.sim.data.geom_xmat[self.env.sim.model.geom_name2id(name)].reshape(3, 3) quat_xyzw = mat2quat(R) quat = np.array([quat_xyzw[3], quat_xyzw[0], quat_xyzw[1], quat_xyzw[2]]) return pos, quat
def get_interpolated_goal(self): """ Provides the next step in interpolation given the remaining steps. NOTE: If this interpolator is for orientation, it is assumed to be receiving either euler angles or quaternions Returns: np.array: Next position in the interpolated trajectory """ # Grab start position x = np.array(self.start) # Calculate the desired next step based on remaining interpolation steps if self.ori_interpolate is not None: # This is an orientation interpolation, so we interpolate linearly around a sphere instead goal = np.array(self.goal) if self.ori_interpolate == "euler": # this is assumed to be euler angles (x,y,z), so we need to first map to quat x = T.mat2quat(T.euler2mat(x)) goal = T.mat2quat(T.euler2mat(self.goal)) # Interpolate to the next sequence x_current = T.quat_slerp(x, goal, fraction=(self.step + 1) / self.total_steps) if self.ori_interpolate == "euler": # Map back to euler x_current = T.mat2euler(T.quat2mat(x_current)) else: # This is a normal interpolation dx = (self.goal - x) / (self.total_steps - self.step) x_current = x + dx # Increment step if there's still steps remaining based on ramp ratio if self.step < self.total_steps - 1: self.step += 1 # Return the new interpolated step return x_current
def _randomize_rotation(self, name): """ Helper function to randomize orientation of a specific camera Args: name (str): Name of the camera to randomize for """ # sample a small, random axis-angle delta rotation random_axis, random_angle = trans.random_axis_angle(angle_limit=self.rotation_perturbation_size, random_state=self.random_state) random_delta_rot = trans.quat2mat(trans.axisangle2quat(random_axis * random_angle)) # compute new rotation and set it base_rot = trans.quat2mat(trans.convert_quat(self._defaults[name]['quat'], to='xyzw')) new_rot = random_delta_rot.T.dot(base_rot) new_quat = trans.convert_quat(trans.mat2quat(new_rot), to='wxyz') self.set_quat( name, new_quat, )
def joint_positions_for_eef_command(self, dpos, rotation): """ This function runs inverse kinematics to back out target joint positions from the provided end effector command. Same arguments as @get_control. Returns: A list of size @num_joints corresponding to the target joint angles. """ self.ik_robot_target_pos += dpos * self.user_sensitivity # this rotation accounts for offsetting the rotation between the final joint and # the hand link, since the pybullet eef frame is the final joint # of robot arm, whereas the mujoco eef frame is the hand link, which is rotated # from the final joint by the 'quat' value for 'right_hand' specified in panda/robot.xml rotation = rotation.dot( T.rotation_matrix(angle=-np.pi/4, direction=[0., 0., 1.], point=None)[ :3, :3 ] ) self.ik_robot_target_orn = T.mat2quat(rotation) # convert from target pose in base frame to target pose in bullet world frame world_targets = self.bullet_base_pose_to_world_pose( (self.ik_robot_target_pos, self.ik_robot_target_orn) ) # Set default rest pose as a neutral down-position over the center of the table rest_poses = [0, np.pi/6, 0.00, -(np.pi - 2*np.pi/6), 0.00, (np.pi - np.pi/6), np.pi/4] for bullet_i in range(100): arm_joint_pos = self.inverse_kinematics( world_targets[0], world_targets[1], rest_poses=rest_poses ) self.sync_ik_robot(arm_joint_pos, sync_last=True) return arm_joint_pos
def rotate_camera(env, direction, angle, camera_id): """ Rotate the camera view about a direction (in the camera frame). :param direction: a 3-dim numpy array for where to move camera in camera frame :param angle: a float for how much to rotate about that direction :param camera_id: which camera to modify """ # current camera rotation camera_rot = T.quat2mat( T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw')) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, direction, point=None) camera_rot = camera_rot.dot(R[:3, :3]) # set new rotation env.sim.data.set_mocap_quat( "cameramover", T.convert_quat(T.mat2quat(camera_rot), to='wxyz')) env.sim.forward()
def rotate_camera(env, direction, angle, cam_body_id): """ Rotate the camera view about a direction (in the camera frame). Args: direction (np.array): 3-array for where to move camera in camera frame angle (float): how much to rotate about that direction cam_body_id (int): id corresponding to parent body of camera element """ # current camera rotation camera_pos = np.array(env.sim.model.body_pos[cam_body_id]) camera_rot = T.quat2mat(T.convert_quat(env.sim.model.body_quat[cam_body_id], to='xyzw')) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, direction, point=None) camera_rot = camera_rot.dot(R[:3, :3]) # set new rotation env.sim.model.body_quat[cam_body_id] = T.convert_quat(T.mat2quat(camera_rot), to='wxyz') env.sim.forward()
def joint_positions_for_eef_command(self, dpos, rotation): """ This function runs inverse kinematics to back out target joint positions from the provided end effector command. Same arguments as @get_control. Returns: A list of size @num_joints corresponding to the target joint angles. """ self.ik_robot_target_pos += dpos * self.user_sensitivity # this rotation accounts for rotating the end effector by 90 degrees # from its rest configuration. The corresponding line in most demo # scripts is: # `env.set_robot_joint_positions([0, -1.18, 0.00, 2.18, 0.00, 0.57, 1.5708])` rotation = rotation.dot( T.rotation_matrix(angle=-np.pi / 2, direction=[0., 0., 1.], point=None)[ :3, :3 ] ) self.ik_robot_target_orn = T.mat2quat(rotation) # convert from target pose in base frame to target pose in bullet world frame world_targets = self.bullet_base_pose_to_world_pose( (self.ik_robot_target_pos, self.ik_robot_target_orn) ) rest_poses = [0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161] for bullet_i in range(100): arm_joint_pos = self.inverse_kinematics( world_targets[0], world_targets[1], rest_poses=rest_poses ) self.sync_ik_robot(arm_joint_pos, sync_last=True) return arm_joint_pos
def eef_pos_orn_2_joint_pos(self, eef_pos, orientation): self.ik_robot_target_pos = eef_pos orientation = orientation.dot( T.rotation_matrix(angle=-np.pi/4, direction=[0., 0., 1.], point=None)[ :3, :3 ] ) self.ik_robot_target_orn = T.mat2quat(orientation) # convert from target pose in base frame to target pose in bullet world frame world_targets = self.bullet_base_pose_to_world_pose( (self.ik_robot_target_pos, self.ik_robot_target_orn) ) # Set default rest pose as a neutral down-position over the center of the table rest_poses = [0, np.pi/6, 0.00, -(np.pi - 2*np.pi/6), 0.00, (np.pi - np.pi/6), np.pi/4] for bullet_i in range(100): arm_joint_pos = self.inverse_kinematics( world_targets[0], world_targets[1], rest_poses=rest_poses ) self.sync_ik_robot(arm_joint_pos, sync_last=True) return arm_joint_pos
def _step(self, target_qpos): o = None delta_xyz = (target_qpos[:3] - self._eef_pos) / self._hp.substeps for i in range(self._hp.substeps): current_rot = self._env._right_hand_orn pitch, roll, yaw = 0, 0, target_qpos[3] drot1 = rotation_matrix(angle=-pitch, direction=[1., 0, 0], point=None)[:3, :3] drot2 = rotation_matrix(angle=roll, direction=[0, 1., 0], point=None)[:3, :3] drot3 = rotation_matrix(angle=yaw, direction=[0, 0, 1.], point=None)[:3, :3] desired_rot = start_rot.dot(drot1.dot(drot2.dot(drot3))) drotation = current_rot.T.dot(desired_rot) dquat = mat2quat(drotation) o = self._env.step( np.concatenate((delta_xyz, dquat, [target_qpos[-1]])))[0] self._previous_target_qpos = target_qpos return self._proc_obs(o)
def _hand_quat(self): """ Returns: np.array: (x,y,z,w) eef quaternion in base frame of robot. """ return T.mat2quat(self._hand_orn)
def control(self, action, policy_step=False): """ Actuate the robot with the passed joint velocities and gripper control. Args: action (np.array): The control to apply to the robot. The first @self.robot_model.dof dimensions should be the desired normalized joint velocities and if the robot has a gripper, the next @self.gripper.dof dimensions should be actuation controls for the gripper. policy_step (bool): Whether a new policy step (action) is being taken Raises: AssertionError: [Invalid action dimension] """ # clip actions into valid range assert len( action ) == self.action_dim, "environment got invalid action dimension -- expected {}, got {}".format( self.action_dim, len(action)) gripper_action = None if self.has_gripper: gripper_action = action[ self.controller. control_dim:] # all indexes past controller dimension indexes arm_action = action[:self.controller.control_dim] else: arm_action = action # Update the controller goal if this is a new policy step if policy_step: self.num_policy_steps = 0 self.controller.set_goal(arm_action) else: self.num_policy_steps += 1 # Now run the controller for a step torques = self.controller.run_controller() # Clip the torques low, high = self.torque_limits self.torques = np.clip(torques, low, high) # Get gripper action, if applicable if self.has_gripper: self.grip_action(gripper=self.gripper, gripper_action=gripper_action) # Apply joint torque control self.sim.data.ctrl[self._ref_joint_actuator_indexes] = self.torques # If this is a policy step, also update buffers holding recent values of interest if policy_step: # Update proprioceptive values self.recent_qpos.push(self._joint_positions) self.recent_actions.push(action) self.recent_torques.push(self.torques) self.recent_ee_forcetorques.push( np.concatenate((self.ee_force, self.ee_torque))) self.recent_ee_pose.push( np.concatenate((self.controller.ee_pos, T.mat2quat(self.controller.ee_ori_mat)))) self.recent_ee_vel.push( np.concatenate( (self.controller.ee_pos_vel, self.controller.ee_ori_vel))) # Estimation of eef acceleration (averaged derivative of recent velocities) self.recent_ee_vel_buffer.push( np.concatenate( (self.controller.ee_pos_vel, self.controller.ee_ori_vel))) diffs = np.vstack([ self.recent_ee_acc.current, self.control_freq * np.diff(self.recent_ee_vel_buffer.buf, axis=0) ]) ee_acc = np.array([ np.convolve(col, np.ones(10) / 10.0, mode="valid")[0] for col in diffs.transpose() ]) self.recent_ee_acc.push(ee_acc)
has_offscreen_renderer=False, # not needed since not using pixel obs has_renderer=True, # make sure we can render to the screen reward_shaping=False, # use dense rewards control_freq=30, # control should happen fast enough so that simulation looks smooth ) world.mode = 'human' world.reset() world.viewer.set_camera(camera_id=2) world.render() for i in range(5000): dpos, rotation, grasp = [0, 0, -0.01], T.rotation_matrix(angle=0, direction=[1., 0., 0.])[:3, :3], 0 current = world._right_hand_orn drotation = current.T.dot(rotation) # relative rotation of desired from current dquat = T.mat2quat(drotation) grasp = grasp - 1 print(current) print(drotation) print(dquat) # print(grasp) action = np.concatenate([dpos, dquat, [grasp]]) world.step(action) world.render() pass