def _post_action(self, action): """ Run any necessary visualization after running the action Args: action (np.array): Action being passed during this timestep Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) empty dict to be filled with information by subclassed method """ reward, done, info = super()._post_action(action) chassis_body_id = self.sim.model.body_name2id( self.robots[0].robot_model.robot_base) body_pos_z = self.sim.data.body_xpos[chassis_body_id][2] quat = np.array([ self.sim.data.qpos[x] for x in self.robots[0]._ref_chassis_pos_indexes ])[3:] mat = quat2mat(quat) euler = mat2euler(mat) roll, pitch = euler[:2] if abs(roll > 0.785) or abs(pitch > 0.785) or body_pos_z < 0.20: done = True reward = -10 return reward, done, info
def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects object_placements = self.placement_initializer.sample() # Loop through all objects and reset their positions for obj_pos, obj_quat, obj in object_placements.values(): # If prehensile, set the object normally if self.prehensile: self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate([np.array(obj_pos), np.array(obj_quat)])) # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping # the object initially else: eef_rot_quat = T.mat2quat( T.euler2mat( [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0])) obj_quat = T.quat_multiply(obj_quat, eef_rot_quat) for j in range(100): # Set object in hand self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate( [self._eef0_xpos, np.array(obj_quat)])) # Close gripper (action = 1) and prevent arm from moving if self.env_configuration == 'bimanual': # Execute no-op action with gravity compensation torques = np.concatenate([ self.robots[0].controller["right"]. torque_compensation, self.robots[0]. controller["left"].torque_compensation ]) self.sim.data.ctrl[self.robots[ 0]._ref_joint_actuator_indexes] = torques # Execute gripper action self.robots[0].grip_action( gripper=self.robots[0].gripper["right"], gripper_action=[1]) else: # Execute no-op action with gravity compensation self.sim.data.ctrl[self.robots[0]._ref_joint_actuator_indexes] =\ self.robots[0].controller.torque_compensation self.sim.data.ctrl[self.robots[1]._ref_joint_actuator_indexes] = \ self.robots[1].controller.torque_compensation # Execute gripper action self.robots[0].grip_action( gripper=self.robots[0].gripper, gripper_action=[1]) # Take forward step self.sim.step()
def _get_initial_qpos(self): """ Calculates the initial joint position for the robot based on the initial desired pose (self.traj_pt, self.goal_quat). Args: Returns: (np.array): n joint positions """ pos = self._convert_robosuite_to_toolbox_xpos(self.traj_pt) ori_euler = mat2euler(quat2mat(self.goal_quat)) # desired pose T = SE3(pos) * SE3.RPY(ori_euler) # find initial joint positions if self.robots[0].name == "UR5e": robot = rtb.models.DH.UR5() sol = robot.ikine_min(T, q0=self.robots[0].init_qpos) # flip last joint around (pi) sol.q[-1] -= np.pi return sol.q elif self.robots[0].name == "Panda": robot = rtb.models.DH.Panda() sol = robot.ikine_min(T, q0=self.robots[0].init_qpos) return sol.q
def baseline_grasp(env, noise=False): target_pos, target_angle = env._gen_grasp_gt() #x,y,z,a target_angle = T.mat2euler(target_angle)[-1] if noise: cube_length = env.object.size[0] / 2 gripper_noise = np.random.uniform(low=0.95, high=1.05) lateral_noise = np.random.uniform(low=-0.01, high=0.01) longitude_noise = np.random.uniform(low=cube_length * -1.5, high=cube_length) angle_noise = np.random.uniform(low=-0.01, high=0.01) else: gripper_noise = 1 lateral_noise = 0 longitude_noise = 0 angle_noise = 0 target_pos[2] += GRIPPER_LENGTH * gripper_noise target_pos[0] += np.sin(target_angle) * lateral_noise target_pos[1] += np.cos(target_angle) * lateral_noise target_pos[0] += np.cos(target_angle) * longitude_noise target_pos[1] += np.sin(target_angle) * longitude_noise target_angle += angle_noise return target_pos, target_angle
def move_orient(self, env, q1): q0 = env.observation_spec()['eef_quat'] x_target = env.observation_spec()['eef_pos'] fraction_size = 50 for i in range(fraction_size): q_target = T.quat_slerp(q0, q1, fraction=(i + 1) / fraction_size) steps = 0 lamda = 0.01 current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) while (np.linalg.norm(T.mat2euler(drotation)) > 0.01): current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) dquat = T.mat2quat(drotation) x_current = env.observation_spec()['eef_pos'] d_pos = np.clip((x_target - x_current) * lamda, -0.05, 0.05) d_pos = [0, 0, 0] action = np.concatenate((d_pos, dquat, [self.grasp])) env.step(action) env.render() steps += 1 if (steps > 20): break return
def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.25 is provided if the cube is lifted Un-normalized summed components if using reward shaping: - Reaching: in [0, 1], to encourage the arm to reach the cube - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube - Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Note that the final reward is normalized and scaled by reward_scale / 2.25 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ chassis_body_id = self.sim.model.body_name2id( self.robots[0].robot_model.robot_base) body_pos = self.sim.data.body_xpos[chassis_body_id] quat = np.array([ self.sim.data.qpos[x] for x in self.robots[0]._ref_chassis_pos_indexes ])[3:] mat = quat2mat(quat) euler = mat2euler(mat) pitch = euler[1] ctrl_norm = np.linalg.norm(self.sim.data.ctrl[ self.robots[0]._ref_joint_torq_actuator_indexes]) reward = -1*((10*(body_pos[2]-0.37))**2)-0.03*(ctrl_norm**2)-3*abs(pitch) \ - 0.2*((10*body_pos[0])**2+(10*body_pos[1])**2) # Scale reward if requested if self.reward_scale is not None: reward *= self.reward_scale return reward
def get_interpolated_goal(self): """ Provides the next step in interpolation given the remaining steps. NOTE: If this interpolator is for orientation, it is assumed to be receiving either euler angles or quaternions Returns: np.array: Next position in the interpolated trajectory """ # Grab start position x = np.array(self.start) # Calculate the desired next step based on remaining interpolation steps if self.ori_interpolate is not None: # This is an orientation interpolation, so we interpolate linearly around a sphere instead goal = np.array(self.goal) if self.ori_interpolate == "euler": # this is assumed to be euler angles (x,y,z), so we need to first map to quat x = T.mat2quat(T.euler2mat(x)) goal = T.mat2quat(T.euler2mat(self.goal)) # Interpolate to the next sequence x_current = T.quat_slerp(x, goal, fraction=(self.step + 1) / self.total_steps) if self.ori_interpolate == "euler": # Map back to euler x_current = T.mat2euler(T.quat2mat(x_current)) else: # This is a normal interpolation dx = (self.goal - x) / (self.total_steps - self.step) x_current = x + dx # Increment step if there's still steps remaining based on ramp ratio if self.step < self.total_steps - 1: self.step += 1 # Return the new interpolated step return x_current
def compute_joint_velocities_for_endpoint_velocity( self, endpoint_velocity_in_base_frame, joint_angles_array): ### ### Compute the Jacobian inverse at the current set of joint angles ### # Create a KDL array of the joint angles joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array( joint_angles_array) # Compute the Jacobian at the current joint angles jacobian = self.compute_jacobian(joint_angles_kdl_array) # Compute the pseudo-inverse jacobian_inverse = np.linalg.pinv(jacobian) ### ### Then, use the Jacobian inverse to compute the required joint velocities ### # Multiply the Jacobian inverse by the Cartesian velocity joint_velocities = jacobian_inverse * np.concatenate([ endpoint_velocity_in_base_frame[:3, -1].reshape(3, 1), T.mat2euler(endpoint_velocity_in_base_frame[:3, :3], axes='sxyz').reshape(3, 1) ]) # Return the velocities return joint_velocities
def set_goal_orientation(delta, current_orientation, orientation_limit=None, set_ori=None): """ Calculates and returns the desired goal orientation, clipping the result accordingly to @orientation_limits. @delta and @current_orientation must be specified if a relative goal is requested, else @set_ori must be an orientation matrix specified to define a global orientation Args: delta (np.array): Desired relative change in orientation, in axis-angle form [ax, ay, az] current_orientation (np.array): Current orientation, in rotation matrix form orientation_limit (None or np.array): 2d array defining the (min, max) limits of permissible orientation goal commands set_ori (None or np.array): If set, will ignore @delta and set the goal orientation to this value Returns: np.array: calculated goal orientation in absolute coordinates Raises: ValueError: [Invalid orientation_limit shape] """ # directly set orientation if set_ori is not None: goal_orientation = set_ori # otherwise use delta to set goal orientation else: # convert axis-angle value to rotation matrix quat_error = trans.axisangle2quat(delta) rotation_mat_error = trans.quat2mat(quat_error) goal_orientation = np.dot(rotation_mat_error, current_orientation) # check for orientation limits if np.array(orientation_limit).any(): if orientation_limit.shape != (2, 3): raise ValueError("Orientation limit should be shaped (2,3) " "but is instead: {}".format( orientation_limit.shape)) # Convert to euler angles for clipping euler = trans.mat2euler(goal_orientation) # Clip euler angles according to specified limits limited = False for idx in range(3): if orientation_limit[0][idx] < orientation_limit[1][ idx]: # Normal angle sector meaning if orientation_limit[0][idx] < euler[idx] < orientation_limit[ 1][idx]: continue else: limited = True dist_to_lower = euler[idx] - orientation_limit[0][idx] if dist_to_lower > np.pi: dist_to_lower -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_lower += 2 * np.pi dist_to_higher = euler[idx] - orientation_limit[1][idx] if dist_to_lower > np.pi: dist_to_higher -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_higher += 2 * np.pi if dist_to_lower < dist_to_higher: euler[idx] = orientation_limit[0][idx] else: euler[idx] = orientation_limit[1][idx] else: # Inverted angle sector meaning if orientation_limit[0][idx] < euler[idx] or euler[ idx] < orientation_limit[1][idx]: continue else: limited = True dist_to_lower = euler[idx] - orientation_limit[0][idx] if dist_to_lower > np.pi: dist_to_lower -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_lower += 2 * np.pi dist_to_higher = euler[idx] - orientation_limit[1][idx] if dist_to_lower > np.pi: dist_to_higher -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_higher += 2 * np.pi if dist_to_lower < dist_to_higher: euler[idx] = orientation_limit[0][idx] else: euler[idx] = orientation_limit[1][idx] if limited: goal_orientation = trans.euler2mat( np.array([euler[0], euler[1], euler[2]])) return goal_orientation
def test_linear_interpolator(): for controller_name in ["IK_POSE", "OSC_POSE"]: for traj in ["pos", "ori"]: # Define counter to increment timesteps and torques for each trajectory timesteps = [0, 0] summed_abs_delta_torques = [np.zeros(7), np.zeros(7)] for interpolator in [None, "linear"]: # Define numpy seed so we guarantee consistent starting pos / ori for each trajectory np.random.seed(3) # Define controller path to load controller_path = os.path.join( os.path.dirname(__file__), '../../robosuite', 'controllers/config/{}.json'.format( controller_name.lower())) with open(controller_path) as f: controller_config = json.load(f) controller_config["interpolation"] = interpolator controller_config["ramp_ratio"] = 1.0 # Now, create a test env for testing the controller on env = suite.make( "Lift", robots="Sawyer", has_renderer=args. render, # by default, don't use on-screen renderer for visual validation has_offscreen_renderer=False, use_camera_obs=False, horizon=10000, control_freq=20, controller_configs=controller_config) # Reset the environment env.reset() # Hardcode the starting position for sawyer init_qpos = [ -0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628 ] env.robots[0].set_robot_joint_positions(init_qpos) env.robots[0].controller.update_initial_joints(init_qpos) env.robots[0].controller.reset_goal() # Notify user a new trajectory is beginning print( "\nTesting controller {} with trajectory {} and interpolator={}..." .format(controller_name, traj, interpolator)) # If rendering, set controller to front view to get best angle for viewing robot movements if args.render: env.viewer.set_camera(camera_id=0) # Keep track of state of robot eef (pos, ori (euler)) and torques current_torques = np.zeros(7) initial_state = [ env.robots[0]._hand_pos, T.mat2euler(env.robots[0]._hand_orn) ] dstate = [ env.robots[0]._hand_pos - initial_state[0], T.mat2euler(env.robots[0]._hand_orn) - initial_state[1] ] # Define the uniform trajectory action if traj == "pos": pos_act = pos_action_ik if controller_name == "IK_POSE" else pos_action_osc rot_act = np.zeros(3) else: pos_act = np.zeros(3) rot_act = rot_action_ik if controller_name == "IK_POSE" else rot_action_osc # Compose the action action = np.concatenate([pos_act, rot_act, [0]]) # Determine which trajectory we're executing k = 0 if traj == "pos" else 1 j = 0 if not interpolator else 1 # Run trajectory until the threshold condition is met while abs(dstate[k][indexes[k]]) < abs(thresholds[k]): _, summed_torques, current_torques = step( env, action, current_torques) if args.render: env.render() # Update torques, timestep count, and state summed_abs_delta_torques[j] += summed_torques timesteps[j] += 1 dstate = [ env.robots[0]._hand_pos - initial_state[0], T.mat2euler(env.robots[0]._hand_orn) - initial_state[1] ] # When finished, print out the timestep results print( "Completed trajectory. Total summed absolute delta torques: {}" .format(summed_abs_delta_torques[j])) # Shut down this env before starting the next test env.close() # Tests completed! print() print("-" * 80) print("All linear interpolator testing completed.\n")
def test_linear_interpolator(): for controller_name in ["EE_POS_ORI", "EE_IK"]: for traj in ["pos", "ori"]: # Define counter to increment timesteps for each trajectory timesteps = [0, 0] for interpolator in [None, "linear"]: # Define numpy seed so we guarantee consistent starting pos / ori for each trajectory np.random.seed(3) # Define controller path to load controller_path = os.path.join( os.path.dirname(__file__), '../../robosuite', 'controllers/config/{}.json'.format( controller_name.lower())) with open(controller_path) as f: controller_config = json.load(f) controller_config["interpolation"] = interpolator controller_config["ramp_ratio"] = 1.0 # Now, create a test env for testing the controller on env = suite.make( "Lift", robots="Sawyer", has_renderer=args. render, # by default, don't use on-screen renderer for visual validation has_offscreen_renderer=False, use_camera_obs=False, horizon=10000, control_freq=20, controller_configs=controller_config) # Reset the environment env.reset() # Notify user a new trajectory is beginning print( "\nTesting controller {} with trajectory {} and interpolator={}..." .format(controller_name, traj, interpolator)) # If rendering, set controller to front view to get best angle for viewing robot movements if args.render: env.viewer.set_camera(camera_id=0) # Keep track of state of robot eef (pos, ori (euler)) initial_state = [ env.robots[0]._right_hand_pos, T.mat2euler(env.robots[0]._right_hand_orn) ] dstate = [ env.robots[0]._right_hand_pos - initial_state[0], T.mat2euler(env.robots[0]._right_hand_orn) - initial_state[1] ] # Define the uniform trajectory action if traj == "pos": pos_act = pos_action_ik if controller_name.lower( ) == "ee_ik" else pos_action_osc rot_act = T.mat2quat(T.euler2mat( np.zeros(3))) if controller_name.lower( ) == "ee_ik" else np.zeros(3) else: pos_act = np.zeros(3) rot_act = rot_action_ik if controller_name.lower( ) == "ee_ik" else rot_action_osc # Compose the action action = np.concatenate([pos_act, rot_act, [0]]) # Determine which trajectory we're executing k = 0 if traj == "pos" else 1 j = 0 if not interpolator else 1 # Run trajectory until the threshold condition is met while abs(dstate[k][indexes[k]]) < abs(thresholds[k]): env.step(action) if args.render: env.render() # Update timestep count and state timesteps[j] += 1 dstate = [ env.robots[0]._right_hand_pos - initial_state[0], T.mat2euler(env.robots[0]._right_hand_orn) - initial_state[1] ] # When finished, print out the timestep results print("Completed trajectory. Took {} timesteps total.".format( timesteps[j])) # Shut down this env before starting the next test env.close() # Assert that the interpolated path is slower than the non-interpolated one assert timesteps[1] > min_ratio * timesteps[0], "Error: Interpolated trajectory time should be longer " \ "than non-interpolated!" # Tests completed! print() print("-" * 80) print("All linear interpolator testing completed.\n")
import numpy as np import matplotlib.pyplot as plt import robosuite.utils.transform_utils as T target_pos = np.array([0.66, 0.16, 0.387]) target_quat = np.array( [-9.99048222e-01, 4.36193835e-02, 6.11740587e-17, 2.67091712e-18]) target_angle = T.mat2euler(T.quat2mat(target_quat)) print(target_angle) pos_traj = np.load('pos_traj.npy') quat_traj = np.concatenate( [np.load('quat_traj.npy'), np.load('quat_traj2.npy')], axis=0) # plt.plot(range(len(pos_traj)), pos_traj[:,2], label='z') # plt.plot(range(len(pos_traj)), [target_pos[2]] * len(pos_traj), label='target') # plt.show() angle_traj = np.array([T.mat2euler(T.quat2mat(q)) for q in quat_traj]) print(angle_traj[-1]) plt.plot(range(len(angle_traj)), angle_traj[:, 2], label='actual') plt.plot(range(len(angle_traj)), [target_angle[2]] * len(angle_traj), label='target') plt.xlabel('t') plt.ylabel('Z-orientation (rad)') plt.legend() plt.show()
point=target_pos) target_rot_Z = T.rotation_matrix(math.pi + target_angle, np.array([0, 0, 1]), point=target_pos) target_rot = np.matmul(target_rot_Z, np.matmul(target_rot_Y, target_rot_X)) target_quat = T.mat2quat(target_rot) dquat = T.quat_slerp(current_quat, target_quat, 0.01) dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat)) action = np.concatenate([dpos, dquat, [grasp]]) obs, reward, done, info = env.step(action) pos_traj.append(current_pos - table_top_center) angle_traj.append(T.mat2euler(T.quat2mat(current_quat))) # env.render() time = 0 done_task = False while not done_task: time += 1 if time > 2000: break current_pos = env._right_hand_pos dpos = target_pos + table_top_center - current_pos if np.max(np.abs(dpos)) < 1e-2: done_task = True
obs = env._get_observation() depth = obs['depth'] depth = cv2.flip(depth, 0) W_2, H_2 = int(depth.shape[0] / 2), int(depth.shape[0] / 2) crop = 64 start_H = W_2-crop end_H = W_2+crop start_W = H_2-crop end_W = H_2+crop roi = depth[start_H:end_H, start_W:end_W] target_pos, target_angle = env._gen_grasp_gt() #x,y,z,a target_angle = T.mat2euler(target_angle)[-1] gripper_noise = np.random.uniform(low=0.95, high=1.05) target_pos[2] += GRIPPER_LENGTH * gripper_noise lateral_noise = np.random.uniform(low=-0.01, high=0.01) target_pos[0] += np.sin(target_angle) * lateral_noise target_pos[1] += np.cos(target_angle) * lateral_noise cube_length = env.object.size[0] / 2 longitude_noise = np.random.uniform(low=cube_length*-1.5, high=cube_length) target_pos[0] += np.cos(target_angle) * longitude_noise target_pos[1] += np.sin(target_angle) * longitude_noise angle_noise = np.random.uniform(low=-0.01, high=0.01)