def _set_action(self, action): assert action.shape == (4,) action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4, ) action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] # rotation grip_mat = rotations.quat2mat(self.sim.data.mocap_quat) grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0]))) grip_radius = (math.atan2(grip_v[0], grip_v[1]) + 2 * math.pi) % (2 * math.pi) if grip_radius > math.pi: grip_radius = (grip_radius - 2 * math.pi) angle_ctrl = grip_radius + action[3] rot_mat = np.array([[1, 0, 0], [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)], [0, math.sin(angle_ctrl), math.cos(angle_ctrl)]]) axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat)) pos_ctrl *= 0.05 # limit maximum change in position gripper_ctrl = np.array([0, 0]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: for i in range(10): self.sim.step() pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def step(self, action): self.num_step += 1 ## parsing of primitive actions delta_x, delta_y = action # cap deltas to be between -1, 1 delta_x = max(-1, min(1, delta_x)) delta_y = max(-1, min(1, delta_y)) x, y = self.get_mocap_state() x += delta_x * 0.05 y += delta_y * 0.05 out_of_bound = (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6) if not out_of_bound: delta_pos = np.array([delta_x * 0.05, delta_y * 0.05, 0.0]) delta_quat = np.array([0.0, 0.0, 1.0, 0.]) delta = np.concatenate((delta_pos, delta_quat)) mocap_set_action(self.sim, delta) self.do_simulation() ob = self._get_obs() total_reward, success = self.calc_reward(ob, True) ## getting state info = {"done": None} if success: done = True info["done"] = "goal reached" elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = self.copy_obs(ob) return ob, total_reward, done, info
def step(self, action): self.num_step += 1 ## parsing of primitive actions delta_x, delta_y, delta_z = action # cap the motions delta_x = max(-1, min(1, delta_x)) delta_y = max(-1, min(1, delta_y)) delta_z = max(-1, min(1, delta_z)) x, y, z = self.get_mocap_state() new_x = max(0.3, min(0.8, x + delta_x * 0.05)) new_y = max(0.0, min(0.6, y + delta_y * 0.05)) new_z = max(0.11, min(0.25, z + delta_z * 0.05)) delta_pos = np.array([new_x - x, new_y - y, new_z - z]) delta_quat = np.array([0.0, 0.0, 1.0, 0.]) delta = np.concatenate((delta_pos, delta_quat)) mocap_set_action(self.sim, delta) self.do_simulation() ob = self._get_obs() total_reward = self.calc_reward(ob) ## getting state info = {"done": None} if total_reward == 0: done = True info["done"] = "goal reached" elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False info['absolute_ob'] = ob.copy() return ob, total_reward, done, info
def _set_action(self, action): action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, rot_ctrl, gripper_ctrl = action[:3], action[3], action[4] #pos_ctrl[1:]=np.array([0, 0]) pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl *= 90. #rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion axis = np.array([1, 0, 0]) rot_ctrl = quat_from_angle_and_axis(np.pi * rot_ctrl / 180, axis) rot_ctrl = rotations.quat_mul(np.array([1., 0., 1., 0.]), rot_ctrl) gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (3,) self.counter += 1 action = action.copy() # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([-1, -1]) assert gripper_ctrl.shape == (2,) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) action[2] = 0.56402952 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 3: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: action = np.array([0,0,-0.05,1,0,1,0,1,1]) utils.mocap_set_action(self.sim, action) for _ in range(5): utils.ctrl_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): if (not action.shape == (8,)) and (not action.shape == (4,)): assert False action = action.copy() # ensure that we don't change the action outside of this scope if action.shape == (4,): mocap_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl, rot_ctrl = mocap_ctrl, [1., 0., 1., 0.] elif action.shape == (8,): mocap_ctrl, gripper_ctrl = action[:7], action[7] pos_ctrl, rot_ctrl = mocap_ctrl[:3], mocap_ctrl[3:7] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = rot_ctrl gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2,) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): ''' Sets the action to apply to the simulation ''' assert action.shape == (self.n_actions, ) # Ensure the action doesn't change outside of this scope action = action.copy() pos_ctrl, gripper_ctrl = action[:3], action[3] # Limit the maximum change in position pos_ctrl *= 20 # Fix roation of the hand, expressed as a quaterion rot_ctrl = action[4:] gripper_ctrl = np.array([gripper_ctrl]) assert gripper_ctrl.shape == (1, ) if self.block_gripper: gripper_ctrl = [0.0] action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply the action to the simulation utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([0, 0]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) if self.use_close_loop: action[2] = 0.41644691 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) if self.counter >= 3: for _ in range(10): self.sim.step() pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([0, 0]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): assert action.shape == (4, ) action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] pos_ctrl *= 0.5 # limit maximum change in position rot_ctrl = [ 0., 0, 1, 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) gripper_pos = self.sim.data.get_site_xpos('robot0:grip') z_move = gripper_pos[2] + action[2] if z_move < 0.15: action[2] = 0.15 - gripper_pos[2] # Apply action to simulation. utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([1, 1]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: img = self.sim.render(width=500, height=500, camera_name="external_camera_1") self.sim.step() pos_ctrl = np.array([0.0, 0.0, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) # logging # grip_pos = self.sim.data.get_site_xpos('robot0:grip') # obs_pos = self.sim.data.get_site_xpos('object0') # offset = obs_pos - grip_pos # print(offset, np.linalg.norm(offset, axis = -1)) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() img = self.sim.render(width=500, height=500, camera_name="external_camera_1") for _ in range(10): pos_ctrl = np.array([0.0, 0.02, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) self.sim.step()
def _set_action(self, action): assert action.shape == (3, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion gripper_ctrl = np.array([1, 1]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) if self.counter <= 1: action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) if self.use_close_loop: action[2] = 0.46470766 for _ in range(20): utils.mocap_set_action_abs(self.sim, action) self.sim.step() else: pos_ctrl *= 0.05 # limit maximum change in position action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.mocap_set_action(self.sim, action) if self.counter >= 3: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: self.sim.step() pos_ctrl = np.array([0.0, 0.0, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) # logging # grip_pos = self.sim.data.get_site_xpos('robot0:grip') # obs_pos = self.sim.data.get_site_xpos('object0') # offset = obs_pos - grip_pos # print(offset, np.linalg.norm(offset, axis = -1)) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() pos_ctrl = np.array([0.0, 0.0, 0.2]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def set_action(self, action: np.ndarray) -> None: utils.mocap_set_action(self.mj_sim, action)
def _set_action(self, action): """ Define the Action: x,y,z relative position of the grippe + open/close gripper """ assert action.shape == (4,) # define the action space = x,y,z,open/close action = action.copy() eff_pos_control, gripper_control = action[:3], action[3] # 0.05 open | 0 close #print("gripper_control = {}".format(action[3])) #eff_pos_control *= 0.05 # limit the maximum change in position #print('eff_pos_control = {}'.format(eff_pos_control)) #eff_rot_control = [0., 0., 0., 1] # in this case, we fix the rotation of the end-effector eff_rot_control = [1., 0., 1., 0.] gripper_control = np.array([gripper_control, -gripper_control]) # l_finger(-1,0) r_finger(0,1) assert gripper_control.shape == (2,) # if the gripper is blocked, open the gripper if self.block_gripper: gripper_control = np.zeros_like(gripper_control) action = np.concatenate([eff_pos_control, eff_rot_control, gripper_control]) self.action_done_flag = False if self.mode == "robot": action = action.copy() action = action.astype(np.float32) #global counter #counter = counter + 1 print('Set Action displacement = {}'.format(action[:3])) self.action_publisher.publish(action) self.action_done_flag = rospy.wait_for_message('/pyrobot/action_done', Bool).data if self.action_done_flag == True: rospy.loginfo('[STATUS] Action Done') else: rospy.loginfo('[STATUS] Action not feasbile and passed') """ while(self.action_done_flag != True): n = n + 1 print('n = {}'.format(n)) rospy.loginfo('action done = {}'.format(self.action_done_flag)) #self.action_done_flag = rospy.wait_for_message('/pyrobot/action_done', Bool).data # DEBUG: if self.action_done_flag == True: pass else: rospy.loginfo('Waiting Action to be Done ~') end_time = timer() time_passed = end_time - start_time print('time_passed = {}'.format(time_passed)) if time_passed >= 5.: self.action_done_flag = True rospy.loginfo('Time Out and Pass ~~') """ global counter counter = counter + 1 print('counter = {}'.format(counter)) # DEBUG: #rospy.loginfo('[DEBUG] Action Published') #rospy.loginfo(action) elif self.mode == "sim": # Apply actions to simulation # DEBUG: #print('action = {}'.format(action)) #utils.ctrl_set_action(self.sim, action) print('[Set Action] action = {}'.format(action)) utils.mocap_set_action(self.sim, action) """
def _set_action(self, action): if self._use_real_robot: assert action.shape == (self.n_actions, ) # 6 mobile base action = action.copy( ) # ensure that we don't change the action outside of this scope if self.debug_print: print("_set_action:", action) pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[ 3:-1], action[-1] pos_ctrl *= 0.05 # limit maximum change in position base_ctrl *= 0.01 rot_ctrl = [ 0, 0.707, 0.707, 0 ] # fixed rotation of the end effector, expressed as a quaternion if self.gripper_close: gripper_ctrl = -1.0 else: gripper_ctrl = 1.0 if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) # action = np.concatenate([pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl]) ee_pose = self.husky_ur5_robot.arm_get_ee_pose(self.use_arm) # arm_action = [ee_pose.pose.position.x + pos_ctrl[0], # ee_pose.pose.position.y + pos_ctrl[1], # ee_pose.pose.position.z + pos_ctrl[2], # ee_pose.pose.orientation.w, # ee_pose.pose.orientation.x, # ee_pose.pose.orientation.y, # ee_pose.pose.orientation.z] arm_action = pos_ctrl print("arm_action: ", arm_action) # Applay action to real robot # self.husky_ur5_robot.arm_set_ee_pose_relative(pos_ctrl) self.husky_ur5_robot.arm_set_ee_pose_relative(arm_action) self.husky_ur5_robot.base_velocity_cmd(base_ctrl) # self.husky_ur5_robot.base_go_to_relative(base_ctrl) if self.gripper_close: self.husky_ur5_robot.gripper_close(self.use_arm) else: self.husky_ur5_robot.gripper_open(self.use_arm) else: assert action.shape == (self.n_actions, ) # 6 mobile base action = action.copy( ) # ensure that we don't change the action outside of this scope if self.debug_print: print("_set_action:", action) pos_ctrl, base_ctrl, gripper_ctrl = action[:3], action[ 3:-1], action[-1] # pos_ctrl, gripper_ctrl = action[:3], action[3:] pos_ctrl *= 0.03 # limit maximum change in position base_ctrl *= 0.01 # rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion rot_ctrl = [0, 0.707, 0.707, 0] #(0 0 0) if self.gripper_close: gripper_ctrl = -1.0 else: gripper_ctrl = 1.0 gripper_ctrl = self.gripper_format_action(gripper_ctrl) # assert gripper_ctrl.shape == (2,) assert gripper_ctrl.shape == (self.gripper_actual_dof, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate( [pos_ctrl, rot_ctrl, base_ctrl, gripper_ctrl]) # action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. utils.ctrl_set_action(self.sim, action) # base control + gripper control utils.mocap_set_action( self.sim, action) # arm control in cartesion (x, y, z) # Applay action to real robot gripper_cmd = 0 if self.gripper_close: gripper_cmd = np.array([-1.0]) else: gripper_cmd = np.array([1.0]) action_ros = np.concatenate( [pos_ctrl, rot_ctrl, base_ctrl, gripper_cmd])
def _set_action(self, action): action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl, gripper_ctrl = action[:3], action[3] stiffness_ctrl = 0.0 psv_stiffness_ctrl = 0.0 par_stiffness_ctrl = 0.0 pos_ctrl *= 0.05 # limit maximum change in position rot_ctrl = [ 1., 0., 1., 0. ] # fixed rotation of the end effector, expressed as a quaternion if action.shape[0] > 4: if action.shape[0] > 5: if action.shape[0] > 6: par_stiffness_ctrl = 10.0 * action[6] self.par_prev_stiffness += par_stiffness_ctrl self.par_prev_stiffness = np.max( [np.min([self.par_prev_stiffness, 50.0]), 0.0]) # self.psv_prev_stiffness -= self.par_prev_stiffness self.sim.model.jnt_stiffness[self.sim.model.joint_name2id( 'robot0:r_gripper_finger_joint' )] = self.par_prev_stiffness self.sim.model.jnt_stiffness[self.sim.model.joint_name2id( 'robot0:l_gripper_finger_joint' )] = self.par_prev_stiffness psv_stiffness_ctrl = (self.max_stiffness / 5.0) * action[5] self.psv_prev_stiffness += psv_stiffness_ctrl self.psv_prev_stiffness = np.max([ np.min([ self.psv_prev_stiffness, self.max_stiffness - self.par_prev_stiffness ]), self.max_stiffness / 25.0 ]) self.sim.model.actuator_gainprm[ self.sim.model. actuator_name2id('robot0:l_gripper_finger_joint'), 0] = self.psv_prev_stiffness self.sim.model.actuator_gainprm[ self.sim.model. actuator_name2id('robot0:r_gripper_finger_joint'), 0] = self.psv_prev_stiffness self.sim.model.actuator_biasprm[ self.sim.model. actuator_name2id('robot0:l_gripper_finger_joint'), 1] = -self.psv_prev_stiffness self.sim.model.actuator_biasprm[ self.sim.model. actuator_name2id('robot0:r_gripper_finger_joint'), 1] = -self.psv_prev_stiffness stiffness_ctrl = (self.max_stiffness / 5.0) * action[4] self.prev_stiffness += stiffness_ctrl self.prev_stiffness = np.max([ np.min([ self.prev_stiffness, self.par_prev_stiffness + self.psv_prev_stiffness ]), self.par_prev_stiffness ]) # print("Series:{}, Parallel:{}, Active:{}".format(self.psv_prev_stiffness,self.par_prev_stiffness, self.prev_stiffness)) gripper_ctrl = 1.0 * np.array([gripper_ctrl, gripper_ctrl]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) prob = 0.1 if self.pert_type == 'delay' else -0.1 if np.random.random() > prob: action = np.concatenate( [pos_ctrl, rot_ctrl, gripper_ctrl, [self.prev_stiffness]]) self.previous_input = action[7:] else: try: action = np.concatenate( [pos_ctrl, rot_ctrl, self.previous_input]) except: action = np.concatenate([pos_ctrl, rot_ctrl, np.zeros(3)]) # print("Gripper pose:{}, stiffness:{}".format(gripper_ctrl, self.prev_stiffness)) # self.prev_stiffness = self.sim.model.actuator_gainprm[self.sim.model.actuator_name2id('robot0:l_gripper_finger_joint'), 0] # Apply action to simulation. utils.ctrl_set_action(self.sim, action, self.stiffness_on) utils.mocap_set_action(self.sim, action)
def _set_action(self, action): assert action.shape == (4, ) self.counter += 1 action = action.copy( ) # ensure that we don't change the action outside of this scope pos_ctrl = action[:3] grip_mat = rotations.quat2mat(self.sim.data.mocap_quat) grip_v = np.squeeze(np.matmul(grip_mat, np.array([0, 1, 0]))) grip_radius = (math.atan2(grip_v[0], grip_v[1]) + 2 * math.pi) % (2 * math.pi) if grip_radius > math.pi: grip_radius = (grip_radius - 2 * math.pi) angle_ctrl = grip_radius + action[3] rot_mat = np.array([[1, 0, 0], [0, math.cos(angle_ctrl), -math.sin(angle_ctrl)], [0, math.sin(angle_ctrl), math.cos(angle_ctrl)]]) axis_mat = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]]) rot_ctrl = rotations.mat2quat(np.matmul(axis_mat, rot_mat)) pos_ctrl *= 0.05 # limit maximum change in position gripper_ctrl = np.array([1, 1]) assert gripper_ctrl.shape == (2, ) if self.block_gripper: gripper_ctrl = np.zeros_like(gripper_ctrl) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) # Apply action to simulation. # utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) if self.counter >= 2: # if np.linalg.norm(pos_ctrl, axis=-1) < 0.025: self.sim.step() img = self.sim.render(width=500, height=500, camera_name="external_camera_1") cv2.imwrite("/checkpoint/jdong1021/grasp_sample1.png", img) pos_ctrl = np.array([0.0, 0.0, 0.0]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action) # logging # grip_pos = self.sim.data.get_site_xpos('robot0:grip') # obs_pos = self.sim.data.get_site_xpos('object0') # offset = obs_pos - grip_pos # print(offset, np.linalg.norm(offset, axis = -1)) for _ in range(20): utils.ctrl_set_action(self.sim, action) self.sim.step() img = self.sim.render(width=500, height=500, camera_name="external_camera_1") cv2.imwrite("/checkpoint/jdong1021/grasp_sample2.png", img) pos_ctrl = np.array([0.0, 0.0, 0.2]) gripper_ctrl = np.array([-1, -1]) action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) utils.ctrl_set_action(self.sim, action) utils.mocap_set_action(self.sim, action)
def step(self, action): self.num_step += 1 ## parsing of primitive actions delta_x, delta_y, delta_z, gripper = action # cap the motions delta_x = max(-1, min(1, delta_x)) delta_y = max(-1, min(1, delta_y)) delta_z = max(-1, min(1, delta_z)) x, y, z = self.get_mocap_state() x += delta_x * 0.05 y += delta_y * 0.05 z += delta_z * 0.05 out_of_bound = (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < 0.10 or z > 0.25) if not out_of_bound: delta_pos = np.array( [delta_x * 0.05, delta_y * 0.05, delta_z * 0.05]) delta_quat = np.array([0.0, 0.0, 1.0, 0.]) delta = np.concatenate((delta_pos, delta_quat)) mocap_set_action(self.sim, delta) self.close_gripper(gripper) self.do_simulation() # else: # print("out of bound as x:%.4f, y:%.4f, z:%.4f"%(x,y,z)) ob = self._get_obs() total_reward = self.calc_reward(ob) box_loc = ob[self.space_dim:2 * self.space_dim] ## getting state info = {"done": None} if total_reward == 0: done = True info["done"] = "goal reached" elif (self.num_step > self.max_num_steps): done = True info["done"] = "max_steps_reached" else: done = False # check block loc obj_pos = ob[self.space_dim:2 * self.space_dim] block_out_of_bound = False if obj_pos.size == 2: x, y = obj_pos if (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6): block_out_of_bound = True else: x, y, z = obj_pos if (x < 0.3 or x > 0.8) or (y < 0.0 or y > 0.6) or (z < -0.1 or z > 0.25): block_out_of_bound = True if block_out_of_bound: done = True info['done'] = 'unstable simulation' total_reward -= (self.max_num_steps - self.num_step) + 2 info['absolute_ob'] = ob.copy() return ob, total_reward, done, info