def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Change Gravity to 0 ->That arm doesnt fall 2) Turn Controllers off 3) Pause Simulation 4) Delete previous target object if randomly chosen object is set to True 4) Reset Simulation 5) Set Model Pose to desired one 6) Unpause Simulation 7) Turn on Controllers 8) Restore Gravity 9) Get Observations and return current State 10) Check all Systems work 11) Spawn new target 12) Pause Simulation 13) Write initial Position into Yaml File 14) Create YAML Files for contact forces in order to get the average over 2 contacts 15) Create YAML Files for collision to make shure to see a collision due to high noise in topic 16) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 18) Return State """ self.gazebo.change_gravity(0, 0, 0) self.controllers_object.turn_off_controllers() self.gazebo.pauseSim() self.gazebo.resetSim() # randomly set the wrist_3 to -90 until 90 degree init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 1.57] self.pickbot_joint_publisher_object.set_joints(init_position) self.set_target_object(random_object=self._random_object, random_position=self._random_position) self.gazebo.unpauseSim() self.controllers_object.turn_on_controllers() self.gazebo.change_gravity(0, 0, -9.81) self._check_all_systems_ready() # self.randomly_spawn_object() last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] with open('last_position.yml', 'w') as yaml_file: yaml.dump(last_position, yaml_file, default_flow_style=False) with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object(self.object_height) self.min_distance = self.max_distance self.gazebo.pauseSim() state = U.get_state(observation) self._update_episode() self.gazebo.unpauseSim() return state
def step(self, action): """ Given the action selected by the learning algorithm, we perform the corresponding movement of the robot return: the state of the robot, the corresponding reward for the step and if its done(terminal State) 1) read last published joint from YAML 2) define ne joints acording to chosen action 3) Write joint position into YAML to save last published joints for next step 4) Unpause, Move to that pos for defined time, Pause 5) Get Observations and pause Simulation 6) Convert Observations into State 7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again 8) Calculate reward based on Observatin and done_reward 9) Unpause that topics can be received in next step 10) Return State, Reward, Done """ self.old_obs = self.get_obs() print( "====================================================================" ) # print("action: {}".format(action)) # 1) read last_position out of YAML File last_position = self.old_obs[:6] # with open("last_position.yml", 'r') as stream: # try: # last_position = (yaml.load(stream, Loader=yaml.Loader)) # except yaml.YAMLError as exc: # print(exc) # 2) get the new joint positions according to chosen action if self._joint_increment_value is None: next_action_position = action else: next_action_position = self.get_action_to_position( np.clip(action, -self._joint_increment_value, self._joint_increment_value), last_position) print("next action position: {}".format( np.around(next_action_position, decimals=3))) # 3) write last_position into YAML File # with open('last_position.yml', 'w') as yaml_file: # yaml.dump(next_action_position, yaml_file, default_flow_style=False) # 4) unpause, move to position for certain time self.gazebo.unpauseSim() self.pickbot_joint_publisher_object.move_joints(next_action_position) # time.sleep(self.running_step) # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed) start_ros_time = rospy.Time.now() while True: # Check collision: invalid_collision = self.get_collisions() if invalid_collision: print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<") observation = self.get_obs() print("joints after reset collision : {} ".format( observation[:6])) # calculate reward immediately distance_error = observation[6:9] - observation[13:16] orientation_error = quaternion_multiply( observation[9:13], quaternion_conjugate(observation[16:])) rewardDist = UMath.rmseFunc(distance_error) rewardOrientation = 2 * np.arccos(abs(orientation_error[0])) reward = UMath.computeReward(rewardDist, rewardOrientation, invalid_collision) print("Reward this step after colliding {}".format(reward)) self.accumulated_episode_reward += reward return U.get_state(observation), reward, True, {} elapsed_time = rospy.Time.now() - start_ros_time if np.isclose(next_action_position, self.joints_state.position, rtol=0.0, atol=0.01).all(): break elif elapsed_time > rospy.Duration(2): # time out print("TIME OUT, joints haven't reach positions") break # 5) Get Observations and pause Simulation observation = self.get_obs() print("Observation in the step: {}".format( np.around(observation[:6], decimals=3))) print("Joints in the step: {}".format( np.around(self.joints_state.position, decimals=3))) # if observation[0] < self.min_distance: # self.min_distance = observation[0] self.gazebo.pauseSim() # 6) Convert Observations into state state = U.get_state(observation) # U.get_obj_orient() # 7) Unpause Simulation check if its done, calculate done_reward self.gazebo.unpauseSim() done, done_reward, invalid_collision = self.is_done( observation, last_position) self.gazebo.pauseSim() # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward # reward = self.compute_reward(observation, done_reward, invalid_contact) # reward = UMath.compute_reward_orient(observation, done_reward, invalid_contact) distance_error = observation[6:9] - observation[13:16] orientation_error = quaternion_multiply( observation[9:13], quaternion_conjugate(observation[16:])) rewardDist = UMath.rmseFunc(distance_error) rewardOrientation = 2 * np.arccos(abs(orientation_error[0])) reward = UMath.computeReward(rewardDist, rewardOrientation, invalid_collision) + done_reward print("Reward this step {}".format(reward)) self.accumulated_episode_reward += reward # 9) Unpause that topics can be received in next step self.gazebo.unpauseSim() self.episode_steps += 1 # 10) Return State, Reward, Done return state, reward, done, {}
def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Change Gravity to 0 -> That arm doesnt fall 2) Turn Controllers off 3) Pause Simulation 4) Delete previous target object if randomly chosen object is set to True 4) Reset Simulation 5) Set Model Pose to desired one 6) Unpause Simulation 7) Turn on Controllers 8) Restore Gravity 9) Get Observations and return current State 10) Check all Systems work 11) Spawn new target 12) Pause Simulation 13) Write initial Position into Yaml File 14) Create YAML Files for contact forces in order to get the average over 2 contacts 15) Create YAML Files for collision to make sure to see a collision due to high noise in topic 16) Unpause Simulation cause in next Step System must be running otherwise no data is seen by Subscribers 17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 18) Return State """ # print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Reset %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") self.gazebo.change_gravity(0, 0, 0) self.controllers_object.turn_off_controllers() # turn off the gripper # U.turn_off_gripper() self.gazebo.resetSim() self.gazebo.pauseSim() self.gazebo.resetSim() time.sleep(0.1) # turn on the gripper # U.turn_on_gripper() if self._load_init_pos: # load sample from previous training result sample_ep = random.choice(self.init_samples) print("Joints from samples: {}".format(sample_ep[0:6])) # self.pickbot_joint_publisher_object.set_joints(sample_ep[0:6]) self.set_target_object(sample_ep[-6:]) else: self.pickbot_joint_publisher_object.set_joints() vg_geo = U.get_link_state("vacuum_gripper_link") to_geo = U.get_link_state("target") orientation_error = quaternion_multiply( vg_geo[3:], quaternion_conjugate(to_geo[3:])) # print("Orientation error {}".format(orientation_error)) box_pos = U.get_random_door_handle_pos( ) if self._random_position else self.object_initial_position U.change_object_position(self.object_name, box_pos) # Code above is hard-coded for door handle, modify later. # TO-DO: Modify reset wrt the object type as in the reach env self.gazebo.unpauseSim() self.controllers_object.turn_on_controllers() self.gazebo.change_gravity(0, 0, -9.81) self._check_all_systems_ready() # last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] # last_position = [0, 0, 0, 0, 0, 0] # with open('last_position.yml', 'w') as yaml_file: # yaml.dump(last_position, yaml_file, default_flow_style=False) # with open('contact_1_force.yml', 'w') as yaml_file: # yaml.dump(0.0, yaml_file, default_flow_style=False) # with open('contact_2_force.yml', 'w') as yaml_file: # yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() # print("current joints {}".format(observation[:6])) # get maximum distance to the object to calculate reward # self.max_distance, _ = U.get_distance_gripper_to_object() # self.min_distance = self.max_distance self.gazebo.pauseSim() state = U.get_state(observation) self._update_episode() self.gazebo.unpauseSim() return state
def step(self, action): """ Given the action selected by the learning algorithm, we perform the corresponding movement of the robot return: the state of the robot, the corresponding reward for the step and if its done(terminal State) 1) read last published joint from YAML 2) define ne joints acording to chosen action 3) Write joint position into YAML to save last published joints for next step 4) Unpause, Move to that pos for defined time, Pause 5) Get Observations and pause Simulation 6) Convert Observations into State 7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again 8) Calculate reward based on Observatin and done_reward 9) Unpause that topics can be received in next step 10) Return State, Reward, Done """ # print("action: {}".format(action)) # 1) read last_position out of YAML File with open("last_position.yml", 'r') as stream: try: last_position = (yaml.load(stream, Loader=yaml.Loader)) except yaml.YAMLError as exc: print(exc) old_observation = self.get_obs() last_position = old_observation[1:7] # 2) get the new joint positions according to chosen action if self._joint_increment is None: next_action_position = action else: next_action_position = self.get_action_to_position( np.clip(action, -self._joint_increment, self._joint_increment), last_position) # 3) write last_position into YAML File with open('last_position.yml', 'w') as yaml_file: yaml.dump(next_action_position, yaml_file, default_flow_style=False) # 4) unpause, move to position for certain time self.gazebo.unpauseSim() self.pickbot_joint_pubisher_object.move_joints(next_action_position) # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed) start_ros_time = rospy.Time.now() while True: # Check collision: invalid_collision = self.get_collisions() if invalid_collision: print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<") observation = self.get_obs() reward = UMath.compute_reward(observation, -200, True) observation = self.get_obs() print("Test Joint: {}".format( np.around(observation[1:7], decimals=3))) return U.get_state(observation), reward, True, {} elapsed_time = rospy.Time.now() - start_ros_time if np.isclose(next_action_position, self.joints_state.position, rtol=0.0, atol=0.01).all(): break elif elapsed_time > rospy.Duration(2): # time out print("TIME OUT, have not reached destination") break # time.sleep(self.running_step) """ #execute action as long as the current position is close to the target position and there is no invalid collision and time spend in the while loop is below 1.2 seconds to avoid beeing stuck touching the object and not beeing able to go to the desired position time1=time.time() while np.linalg.norm(np.asarray(self.joints_state.position)-np.asarray(next_action_position))>0.1 and self.get_collisions()==False and time.time()-time1<0.1: rospy.loginfo("Not yet reached target position and no collision") """ # 5) Get Observations, update the minimum distance, and pause Simulation observation = self.get_obs() if observation[0] < self.min_distance: self.min_distance = observation[0] self.gazebo.pauseSim() # 6) Convert Observations into state state = U.get_state(observation) # 7) Unpause Simulation check if its done, calculate done_reward self.gazebo.unpauseSim() done, done_reward, invalid_contact = self.is_done(observation) self.gazebo.pauseSim() # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward reward = UMath.compute_reward(observation, done_reward, invalid_contact) self.accumulated_episode_reward += reward # 9) Unpause that topics can be received in next step self.gazebo.unpauseSim() self.episode_steps += 1 # 10) Return State, Reward, Done return state, reward, done, {}
def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Change Gravity to 0 ->That arm doesnt fall 2) Turn Controllers off 3) Pause Simulation 4) Reset Simulation 5) Set Model Pose to desired one 6) Unpause Simulation 7) Turn on Controllers 8) Restore Gravity 9) Get Observations and return current State 10) Check all Systems work 11) Pause Simulation 12) Write initial Position into Yaml File 13) Create YAML Files for contact forces in order to get the average over 2 contacts 14) Create YAML Files for collision to make shure to see a collision due to high noise in topic 15) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 16) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 17) Return State """ ###### TEST obs = self.get_obs() print("Before RESET Joint: {}".format(np.around(obs[1:7], decimals=3))) ###### END of TEST self.gazebo.change_gravity(0, 0, 0) self.controllers_object.turn_off_controllers() self.gazebo.resetSim() self.gazebo.pauseSim() ##### TEST ######### # idx = 0 # sys_exit = False # correction_ids = [] # reset_target_pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # current_joint_pos = obs[1:7] # # for joint_pos in obs[1:7]: # if np.abs(joint_pos - math.pi) < 0.1: # sys_exit = True # correction_ids.append(idx) # idx += 1 # # if sys_exit: # for i in correction_ids: # print("i:{}".format(i)) # reset_target_pos[i] = 2.0 if current_joint_pos[i] > 0 else -2.0 # # self.pickbot_joint_pubisher_object.set_joints(reset_target_pos) # self.pickbot_joint_pubisher_object.set_joints([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) #### END of TEST ##### self.pickbot_joint_pubisher_object.set_joints() self.set_target_object(random_object=self._random_object, random_position=self._random_position) self.gazebo.unpauseSim() self.controllers_object.turn_on_controllers() self.gazebo.change_gravity(0, 0, -9.81) self._check_all_systems_ready() ###### TEST ######### # init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] # self.pickbot_joint_pubisher_object.move_joints(init_position) # # start_ros_time = rospy.Time.now() # while True: # elapsed_time = rospy.Time.now() - start_ros_time # if np.isclose(init_position, self.joints_state.position, rtol=0.0, atol=0.01).all(): # break # elif elapsed_time > rospy.Duration(2): # time out # break ###### END of TEST ######## last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] with open('last_position.yml', 'w') as yaml_file: yaml.dump(last_position, yaml_file, default_flow_style=False) with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() print("After RESET Joint: {}".format( np.around(observation[1:7], decimals=3))) # if sys_exit: # print("##################################################") # print("############# Joint near Pi ######################") # print("Reset_target_pos: {}".format(reset_target_pos)) # print("##################################################") # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object() self.min_distance = self.max_distance self.gazebo.pauseSim() state = U.get_state(observation) self._update_episode() self.gazebo.unpauseSim() return state
def step(self, action): """ Given the action selected by the learning algorithm, we perform the corresponding movement of the robot return: the state of the robot, the corresponding reward for the step and if its done(terminal State) 1) read last published joint from YAML 2) define ne joints acording to chosen action 3) Write joint position into YAML to save last published joints for next step 4) Unpause, Move to that pos for defined time, Pause 5) Get Observations and pause Simulation 6) Convert Observations into State 7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again 8) Calculate reward based on Observatin and done_reward 9) Unpause that topics can be received in next step 10) Return State, Reward, Done """ # 1) read last_position out of YAML File with open("last_position.yml", 'r') as stream: try: last_position = (yaml.load(stream, Loader=yaml.Loader)) except yaml.YAMLError as exc: print(exc) # 2) get the new joint positions acording to chosen action next_action_position = self.get_action_to_position(action, last_position) # 3) write last_position into YAML File with open('last_position.yml', 'w') as yaml_file: yaml.dump(next_action_position, yaml_file, default_flow_style=False) # 4) unpause, move to position for certain time self.gazebo.unpauseSim() self.pickbot_joint_publisher_object.move_joints(next_action_position) time.sleep(self.running_step) # 5) Get Observations and pause Simulation observation = self.get_obs() if observation[0] < self.min_distance: self.min_distance = observation[0] self.gazebo.pauseSim() # 6) Convert Observations into state state = U.get_state(observation) # 7) Unpause Simulation check if its done, calculate done_reward self.gazebo.unpauseSim() done, done_reward, invalid_contact = self.is_done(observation, last_position) self.gazebo.pauseSim() # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward # reward = self.compute_reward(observation, done_reward, invalid_contact) reward = UMath.compute_reward(observation, done_reward, invalid_contact) self.accumulated_episode_reward += reward # 9) Unpause that topics can be received in next step self.gazebo.unpauseSim() self.episode_steps += 1 # 10) Return State, Reward, Done return state, reward, done, {}
def step(self, action): """ Given the action selected by the learning algorithm, we perform the corresponding movement of the robot return: the state of the robot, the corresponding reward for the step and if its done(terminal State) 1) Read last joint positions by getting the observation before acting 2) Get the new joint positions according to chosen action (actions here are the joint increments) 3) Publish the joint_positions to MoveIt, meanwhile busy waiting, until the movement is complete 4) Get new observation after performing the action 5) Convert Observations into States 6) Check if the task is done or crashing happens, calculate done_reward and pause Simulation again 7) Calculate reward based on Observatin and done_reward 8) Return State, Reward, Done """ # print("############################") # print("action: {}".format(action)) self.movement_complete.data = False # 1) Read last joint positions by getting the observation before acting old_observation = self.get_obs() # 2) Get the new joint positions according to chosen action (actions here are the joint increments) if self._joint_increment is None: next_action_position = action else: next_action_position = self.get_action_to_position( action, old_observation[1:7]) # 3) Move to position and wait for moveit to complete the execution self.publisher_to_moveit_object.pub_joints_to_moveit( next_action_position) # rospy.wait_for_message("/pickbot/movement_complete", Bool) while not self.movement_complete.data: pass start_ros_time = rospy.Time.now() while True: # Check collision: # invalid_collision = self.get_collisions() # if invalid_collision: # print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<") # observation = self.get_obs() # reward = UMath.compute_reward(observation, -200, True) # observation = self.get_obs() # print("Test Joint: {}".format(np.around(observation[1:7], decimals=3))) # return U.get_state(observation), reward, True, {} elapsed_time = rospy.Time.now() - start_ros_time if np.isclose(next_action_position, self.joints_state.position, rtol=0.0, atol=0.01).all(): break elif elapsed_time > rospy.Duration(2): # time out break # time.sleep(s """ #execute action as long as the current position is close to the target position and there is no invalid collision and time spend in the while loop is below 1.2 seconds to avoid beeing stuck touching the object and not beeing able to go to the desired position time1=time.time() while np.linalg.norm(np.asarray(self.joints_state.position)-np.asarray(next_action_position))>0.1 and self.get_collisions()==False and time.time()-time1<0.1: rospy.loginfo("Not yet reached target position and no collision") """ # 4) Get new observation and update min_distance after performing the action new_observation = self.get_obs() if new_observation[0] < self.min_distace: self.min_distace = new_observation[0] # print("observ: {}".format( np.around(new_observation[1:7], decimals=3))) # 5) Convert Observations into state state = U.get_state(new_observation) # 6) Check if its done, calculate done_reward done, done_reward, invalid_contact = self.is_done(new_observation) # 7) Calculate reward based on Observatin and done_reward and update the accumulated Episode Reward reward = UMath.compute_reward(new_observation, done_reward, invalid_contact) ### TEST ### if done: joint_pos = self.joints_state.position print("Joint in step (done): {}".format( np.around(joint_pos, decimals=3))) ### END of TEST ### self.accumulated_episode_reward += reward self.episode_steps += 1 return state, reward, done, {}
def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Publish the initial joint_positions to MoveIt 2) Busy waiting until the movement is completed by MoveIt 3) set target_object to random position 4) Check all Systems work 5) Create YAML Files for contact forces in order to get the average over 2 contacts 6) Create YAML Files for collision to make shure to see a collision due to high noise in topic 7) Get Observations and return current State 8) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 9) Return State """ # print("Joint (reset): {}".format(np.around(self.joints_state.position, decimals=3))) init_joint_pos = [1.5, -1.2, 1.4, -1.87, -1.57, 0] self.publisher_to_moveit_object.set_joints(init_joint_pos) # print(">>>>>>>>>>>>>>>>>>> RESET: waiting for the movement to complete") # rospy.wait_for_message("/pickbot/movement_complete", Bool) while not self.movement_complete.data: pass # print(">>>>>>>>>>>>>>>>>>> RESET: Waiting complete") start_ros_time = rospy.Time.now() while True: # Check collision: # invalid_collision = self.get_collisions() # if invalid_collision: # print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<") # observation = self.get_obs() # reward = UMath.compute_reward(observation, -200, True) # observation = self.get_obs() # print("Test Joint: {}".format(np.around(observation[1:7], decimals=3))) # return U.get_state(observation), reward, True, {} elapsed_time = rospy.Time.now() - start_ros_time if np.isclose(init_joint_pos, self.joints_state.position, rtol=0.0, atol=0.01).all(): break elif elapsed_time > rospy.Duration(2): # time out break self.set_target_object(random_object=self._random_object, random_position=self._random_position) self._check_all_systems_ready() with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() self.object_position = observation[9:12] # print("Joint (after): {}".format(np.around(observation[1:7], decimals=3))) # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object() self.min_distace = self.max_distance state = U.get_state(observation) self._update_episode() return state