def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Change Gravity to 0 ->That arm doesnt fall 2) Turn Controllers off 3) Pause Simulation 4) Delete previous target object if randomly chosen object is set to True 4) Reset Simulation 5) Set Model Pose to desired one 6) Unpause Simulation 7) Turn on Controllers 8) Restore Gravity 9) Get Observations and return current State 10) Check all Systems work 11) Spawn new target 12) Pause Simulation 13) Write initial Position into Yaml File 14) Create YAML Files for contact forces in order to get the average over 2 contacts 15) Create YAML Files for collision to make shure to see a collision due to high noise in topic 16) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 18) Return State """ self.gazebo.change_gravity(0, 0, 0) self.controllers_object.turn_off_controllers() self.gazebo.pauseSim() self.gazebo.resetSim() # randomly set the wrist_3 to -90 until 90 degree init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 1.57] self.pickbot_joint_publisher_object.set_joints(init_position) self.set_target_object(random_object=self._random_object, random_position=self._random_position) self.gazebo.unpauseSim() self.controllers_object.turn_on_controllers() self.gazebo.change_gravity(0, 0, -9.81) self._check_all_systems_ready() # self.randomly_spawn_object() last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] with open('last_position.yml', 'w') as yaml_file: yaml.dump(last_position, yaml_file, default_flow_style=False) with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object(self.object_height) self.min_distance = self.max_distance self.gazebo.pauseSim() state = U.get_state(observation) self._update_episode() self.gazebo.unpauseSim() return state
def __init__(self, joint_increment_value=0.02, sim_time_factor=0.001, running_step=0.001, random_object=False, random_position=False, use_object_type=False, env_object_type='free_shapes', load_init_pos=False): """ initializing all the relevant variables and connections :param joint_increment_value: increment of the joints :param running_step: gazebo simulation time factor :param random_object: spawn random object in the simulation :param random_position: change object position in each reset :param use_object_type: assign IDs to objects and used them in the observation space :param env_object_type: object type for environment, free_shapes for boxes while others are related to use_case 'door_handle', 'combox', ... """ # Assign Parameters self._joint_increment_value = joint_increment_value self.running_step = running_step self._random_object = random_object self._random_position = random_position self._use_object_type = use_object_type self._load_init_pos = load_init_pos # Assign MsgTypes self.joints_state = JointState() self.contact_1_state = ContactsState() self.contact_2_state = ContactsState() self.collision = Bool() self.camera_rgb_state = Image() self.camera_depth_state = Image() self.contact_1_force = Vector3() self.contact_2_force = Vector3() self.gripper_state = VacuumGripperState() self._list_of_observations = [ "elbow_joint_state", "shoulder_lift_joint_state", "shoulder_pan_joint_state", "wrist_1_joint_state", "wrist_2_joint_state", "wrist_3_joint_state", "vacuum_gripper_pos_x", "vacuum_gripper_pos_y", "vacuum_gripper_pos_z", "vacuum_gripper_ori_w", "vacuum_gripper_ori_x", "vacuum_gripper_ori_y", "vacuum_gripper_ori_z", "object_pos_x", "object_pos_y", "object_pos_z", "object_ori_w", "object_ori_x", "object_ori_y", "object_ori_z", ] # if self._use_object_type: # self._list_of_observations.append("object_type") # Establishes connection with simulator """ 1) Gazebo Connection 2) Controller Connection 3) Joint Publisher """ self.gazebo = GazeboConnection(sim_time_factor=sim_time_factor) self.controllers_object = ControllersConnection() self.pickbot_joint_publisher_object = JointPub() # Define Subscribers as Sensordata """ 1) /pickbot/joint_states 2) /gripper_contactsensor_1_state 3) /gripper_contactsensor_2_state 4) /gz_collisions 5) /pickbot/gripper/state 6) /camera_rgb/image_raw 7) /camera_depth/depth/image_raw """ rospy.Subscriber("/pickbot/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gripper_contactsensor_1_state", ContactsState, self.contact_1_callback) rospy.Subscriber("/gripper_contactsensor_2_state", ContactsState, self.contact_2_callback) rospy.Subscriber("/gz_collisions", Bool, self.collision_callback) rospy.Subscriber("/pickbot/gripper/state", VacuumGripperState, self.gripper_state_callback) # rospy.Subscriber("/camera_rgb/image_raw", Image, self.camera_rgb_callback) # rospy.Subscriber("/camera_depth/depth/image_raw", Image, self.camera_depth_callback) # Define Action and state Space and Reward Range """ Action Space: Box Space with 6 values. State Space: Box Space with 20 values. It is a numpy array with shape (20,) Reward Range: -infinity to infinity """ # Directly use joint_positions as action if self._joint_increment_value is None: high_action = (math.pi - 0.05) * np.ones(6) low_action = -high_action else: # Use joint_increments as action high_action = self._joint_increment_value * np.ones(6) low_action = -high_action self.action_space = spaces.Box(low_action, high_action) self.obs_dim = 20 high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) # if self._use_object_type: # high = np.append(high, 9) # low = np.append(low, 0) self.reward_range = (-np.inf, np.inf) self._seed() self.done_reward = 0 # set up everything to publish the Episode Number and Episode Reward on a rostopic self.episode_num = 0 self.accumulated_episode_reward = 0 self.episode_steps = 0 self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1) self.reward_list = [] self.episode_list = [] self.step_list = [] self.csv_name = logger.get_dir() + '/result_log' print("CSV NAME") print(self.csv_name) self.csv_success_exp = logger.get_dir( ) + '/success_exp' + datetime.datetime.now().strftime( '%Y-%m-%d_%Hh%Mmin') + '.csv' self.successful_attempts = 0 # variable to store last observation self.old_obs = self.get_obs() # object name: name of the target object # object type: index of the object name in the object list # object list: pool of the available objects, have at least one entry self.object_name = '' self.object_type_str = '' self.object_type = 0 self.object_list = U.get_target_object(env_object_type) print("object list {}".format(self.object_list)) self.object_initial_position = Pose(position=Point(x=-0.13, y=0.848, z=1.06), orientation=quaternion_from_euler( 0.002567, 0.102, 1.563)) # select first object, set object name and object type # if object is random, spawn random object # else get the first entry of object_list self.set_target_object([0, 0, 0, 0, 0, 0]) # get maximum distance to the object to calculate reward, renewed in the reset function self.max_distance, _ = U.get_distance_gripper_to_object() # The closest distance during training self.min_distance = 999 # get samples from reaching task if self._load_init_pos: import environments self.init_samples = U.load_samples_from_prev_task( os.path.dirname(environments.__file__) + "/contacts_sample/door_sample/success_exp2019-05-21_11h41min.csv" )
def get_obs(self): """ Returns the state of the robot needed for Algorithm to learn The state will be defined by a List (later converted to numpy array) of the: 1) Distance from desired point in meters 2-7) States of the 6 joints in radiants 8,9) Force in contact sensor in Newtons 10,11,12) x, y, z Position of object? MISSING 10) RGBD image self._list_of_observations = ["distance_gripper_to_object", "elbow_joint_state", "shoulder_lift_joint_state", "shoulder_pan_joint_state", "wrist_1_joint_state", "wrist_2_joint_state", "wrist_3_joint_state", "contact_1_force", "contact_2_force", "object_pos_x", "object_pos_y", "object_pos_z", "object_type", -- if use_object_type set to True "min_distance_gripper_to_object] :return: observation """ # Get Distance Object to Gripper and Object position from Service Call. # Needs to be done a second time cause we need the distance and position after the Step execution distance_gripper_to_object, position_xyz_object = U.get_distance_gripper_to_object( ) object_pos_x = position_xyz_object[0] object_pos_y = position_xyz_object[1] object_pos_z = position_xyz_object[2] # Get Joints Data out of Subscriber joint_states = self.joints_state elbow_joint_state = joint_states.position[0] shoulder_lift_joint_state = joint_states.position[1] shoulder_pan_joint_state = joint_states.position[2] wrist_1_joint_state = joint_states.position[3] wrist_2_joint_state = joint_states.position[4] wrist_3_joint_state = joint_states.position[5] for joint in joint_states.position: if joint > math.pi or joint < -math.pi: print(joint_states.name) print(np.around(joint_states.position, decimals=3)) # self.controllers_object.turn_off_controllers() # self.gazebo.pauseSim() # self.gazebo.resetSim() # U.delete_object("pickbot") # U.spawn_urdf_object("pickbot", self.pickbot_initial_position) # self.gazebo.unpauseSim() # self.controllers_object.turn_off_controllers() # # print("###############################") # print("##### Pickbot respawned #####") # print("###############################") sys.exit("Joint exceeds limit") # Get Contact Forces out of get_contact_force Functions to be able to take an average over some iterations # otherwise chances are high that not both sensors are showing contact the same time contact_1_force = self.get_contact_force_1() contact_2_force = self.get_contact_force_2() # Stack all information into Observations List observation = [] for obs_name in self._list_of_observations: if obs_name == "distance_gripper_to_object": observation.append(distance_gripper_to_object) elif obs_name == "elbow_joint_state": observation.append(elbow_joint_state) elif obs_name == "shoulder_lift_joint_state": observation.append(shoulder_lift_joint_state) elif obs_name == "shoulder_pan_joint_state": observation.append(shoulder_pan_joint_state) elif obs_name == "wrist_1_joint_state": observation.append(wrist_1_joint_state) elif obs_name == "wrist_2_joint_state": observation.append(wrist_2_joint_state) elif obs_name == "wrist_3_joint_state": observation.append(wrist_3_joint_state) elif obs_name == "contact_1_force": observation.append(contact_1_force) elif obs_name == "contact_2_force": observation.append(contact_2_force) elif obs_name == "object_pos_x": observation.append(object_pos_x) elif obs_name == "object_pos_y": observation.append(object_pos_y) elif obs_name == "object_pos_z": observation.append(object_pos_z) elif obs_name == "object_type": observation.append(self.object_type) elif obs_name == "min_distance_gripper_to_object": observation.append(self.min_distance) else: raise NameError('Observation Asked does not exist==' + str(obs_name)) return observation
def __init__(self, joint_increment=None, sim_time_factor=0.001, running_step=0.001, random_object=False, random_position=False, use_object_type=False, populate_object=False, env_object_type='free_shapes'): """ initializing all the relevant variables and connections :param joint_increment: increment of the joints :param running_step: gazebo simulation time factor :param random_object: spawn random object in the simulation :param random_position: change object position in each reset :param use_object_type: assign IDs to objects and used them in the observation space :param populate_object: to populate object(s) in the simulation using sdf file :param env_object_type: object type for environment, free_shapes for boxes while others are related to use_case 'door_handle', 'combox', ... """ # Assign Parameters self._joint_increment = joint_increment self.running_step = running_step self._random_object = random_object self._random_position = random_position self._use_object_type = use_object_type self._populate_object = populate_object # Assign MsgTypes self.joints_state = JointState() self.contact_1_state = ContactsState() self.contact_2_state = ContactsState() self.collisions = Bool() self.camera_rgb_state = Image() self.camera_depth_state = Image() self.contact_1_force = Vector3() self.contact_2_force = Vector3() self.gripper_state = VacuumGripperState() self._list_of_observations = [ "distance_gripper_to_object", "elbow_joint_state", "shoulder_lift_joint_state", "shoulder_pan_joint_state", "wrist_1_joint_state", "wrist_2_joint_state", "wrist_3_joint_state", "contact_1_force", "contact_2_force", "object_pos_x", "object_pos_y", "object_pos_z", "min_distance_gripper_to_object" ] if self._use_object_type: self._list_of_observations.append("object_type") # Establishes connection with simulator """ 1) Gazebo Connection 2) Controller Connection 3) Joint Publisher """ self.gazebo = GazeboConnection(sim_time_factor=sim_time_factor) self.controllers_object = ControllersConnection() self.pickbot_joint_pubisher_object = JointPub() # Define Subscribers as Sensordata """ 1) /pickbot/joint_states 2) /gripper_contactsensor_1_state 3) /gripper_contactsensor_2_state 4) /gz_collisions not used so far but available in the environment 5) /pickbot/gripper/state 6) /camera_rgb/image_raw 7) /camera_depth/depth/image_raw """ rospy.Subscriber("/pickbot/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gripper_contactsensor_1_state", ContactsState, self.contact_1_callback) rospy.Subscriber("/gripper_contactsensor_2_state", ContactsState, self.contact_2_callback) rospy.Subscriber("/gz_collisions", Bool, self.collision_callback) # rospy.Subscriber("/pickbot/gripper/state", VacuumGripperState, self.gripper_state_callback) # rospy.Subscriber("/camera_rgb/image_raw", Image, self.camera_rgb_callback) # rospy.Subscriber("/camera_depth/depth/image_raw", Image, self.camera_depth_callback) # Define Action and state Space and Reward Range """ Action Space: Box Space with 6 values. State Space: Box Space with 12 values. It is a numpy array with shape (12,) Reward Range: -infinity to infinity """ # Directly use joint_positions as action if self._joint_increment is None: low_action = np.array([ -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05) ]) high_action = np.array([ math.pi - 0.05, math.pi - 0.05, math.pi - 0.05, math.pi - 0.05, math.pi - 0.05, math.pi - 0.05 ]) else: # Use joint_increments as action low_action = np.array([ -self._joint_increment, -self._joint_increment, -self._joint_increment, -self._joint_increment, -self._joint_increment, -self._joint_increment ]) high_action = np.array([ self._joint_increment, self._joint_increment, self._joint_increment, self._joint_increment, self._joint_increment, self._joint_increment ]) self.action_space = spaces.Box(low_action, high_action) high = np.array([ 999, # distance_gripper_to_object math.pi, # elbow_joint_state math.pi, # shoulder_lift_joint_state math.pi, # shoulder_pan_joint_state math.pi, # wrist_1_joint_state math.pi, # wrist_2_joint_state math.pi, # wrist_3_joint_state np.finfo(np.float32).max, # contact_1_force np.finfo(np.float32).max, # contact_2_force 1, # object_pos_x 1.4, # object_pos_y 1.5, # object_pos_z 999 ]) # min_distance_gripper_to_object low = np.array([ 0, # distance_gripper_to_object -math.pi, # elbow_joint_state -math.pi, # shoulder_lift_joint_state -math.pi, # shoulder_pan_joint_state -math.pi, # wrist_1_joint_state -math.pi, # wrist_2_joint_state -math.pi, # wrist_3_joint_state 0, # contact_1_force 0, # contact_2_force -1, # object_pos_x 0, # object_pos_y 0, # object_pos_z 0 ]) # min distance if self._use_object_type: high = np.append(high, 9) low = np.append(low, 0) self.observation_space = spaces.Box(low, high) self.reward_range = (-np.inf, np.inf) self._seed() self.done_reward = 0 # set up everything to publish the Episode Number and Episode Reward on a rostopic self.episode_num = 0 self.accumulated_episode_reward = 0 self.episode_steps = 0 self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1) self.reward_list = [] self.episode_list = [] self.step_list = [] self.csv_name = logger.get_dir() + '/result_log' print("CSV NAME") print(self.csv_name) self.csv_success_exp = logger.get_dir( ) + "/success_exp" + datetime.datetime.now().strftime( '%Y-%m-%d_%Hh%Mmin') + ".csv" self.success_2_contact = 0 self.success_1_contact = 0 # object name: name of the target object # object type: index of the object name in the object list # object list: pool of the available objects, have at least one entry self.object_name = '' self.object_type_str = '' self.object_type = 0 self.object_list = U.get_target_object(env_object_type) print("object list {}".format(self.object_list)) self.object_initial_position = Pose( position=Point(x=-0.13, y=0.848, z=1.06), # x=0.0, y=0.9, z=1.05 orientation=quaternion_from_euler(0.002567, 0.102, 1.563)) self.pickbot_initial_position = Pose(position=Point(x=0.0, y=0.0, z=1.12), orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0)) if self._populate_object: # populate objects from object list self.populate_objects() # select first object, set object name and object type # if object is random, spawn random object # else get the first entry of object_list self.set_target_object(random_object=self._random_object, random_position=self._random_position) # The distance between gripper and object, when the arm is in initial pose self.max_distance, _ = U.get_distance_gripper_to_object() # The minimum distance between gripper and object during training self.min_distance = 999
def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Change Gravity to 0 ->That arm doesnt fall 2) Turn Controllers off 3) Pause Simulation 4) Reset Simulation 5) Set Model Pose to desired one 6) Unpause Simulation 7) Turn on Controllers 8) Restore Gravity 9) Get Observations and return current State 10) Check all Systems work 11) Pause Simulation 12) Write initial Position into Yaml File 13) Create YAML Files for contact forces in order to get the average over 2 contacts 14) Create YAML Files for collision to make shure to see a collision due to high noise in topic 15) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 16) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 17) Return State """ ###### TEST obs = self.get_obs() print("Before RESET Joint: {}".format(np.around(obs[1:7], decimals=3))) ###### END of TEST self.gazebo.change_gravity(0, 0, 0) self.controllers_object.turn_off_controllers() self.gazebo.resetSim() self.gazebo.pauseSim() ##### TEST ######### # idx = 0 # sys_exit = False # correction_ids = [] # reset_target_pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # current_joint_pos = obs[1:7] # # for joint_pos in obs[1:7]: # if np.abs(joint_pos - math.pi) < 0.1: # sys_exit = True # correction_ids.append(idx) # idx += 1 # # if sys_exit: # for i in correction_ids: # print("i:{}".format(i)) # reset_target_pos[i] = 2.0 if current_joint_pos[i] > 0 else -2.0 # # self.pickbot_joint_pubisher_object.set_joints(reset_target_pos) # self.pickbot_joint_pubisher_object.set_joints([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) #### END of TEST ##### self.pickbot_joint_pubisher_object.set_joints() self.set_target_object(random_object=self._random_object, random_position=self._random_position) self.gazebo.unpauseSim() self.controllers_object.turn_on_controllers() self.gazebo.change_gravity(0, 0, -9.81) self._check_all_systems_ready() ###### TEST ######### # init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] # self.pickbot_joint_pubisher_object.move_joints(init_position) # # start_ros_time = rospy.Time.now() # while True: # elapsed_time = rospy.Time.now() - start_ros_time # if np.isclose(init_position, self.joints_state.position, rtol=0.0, atol=0.01).all(): # break # elif elapsed_time > rospy.Duration(2): # time out # break ###### END of TEST ######## last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0] with open('last_position.yml', 'w') as yaml_file: yaml.dump(last_position, yaml_file, default_flow_style=False) with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() print("After RESET Joint: {}".format( np.around(observation[1:7], decimals=3))) # if sys_exit: # print("##################################################") # print("############# Joint near Pi ######################") # print("Reset_target_pos: {}".format(reset_target_pos)) # print("##################################################") # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object() self.min_distance = self.max_distance self.gazebo.pauseSim() state = U.get_state(observation) self._update_episode() self.gazebo.unpauseSim() return state
def get_action_to_position(self, action, last_position): """ Take the last published joint and increment/decrement one joint acording to action chosen :param action: Integer that goes from 0 to 11, because we have 12 actions. :return: list with all joint positions acording to chosen action """ distance = U.get_distance_gripper_to_object(self.object_height) self._joint_increment_value = 0.18 * distance[0] + 0.01 joint_states_position = last_position action_position = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] rospy.logdebug("get_action_to_position>>>" + str(joint_states_position)) if action == 0: # Increment joint3_position_controller (elbow joint) action_position[0] = joint_states_position[0] + self._joint_increment_value / 2 action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 1: # Decrement joint3_position_controller (elbow joint) action_position[0] = joint_states_position[0] - self._joint_increment_value / 2 action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 2: # Increment joint2_position_controller (shoulder_lift_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] + self._joint_increment_value / 2 action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 3: # Decrement joint2_position_controller (shoulder_lift_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] - self._joint_increment_value / 2 action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 4: # Increment joint1_position_controller (shoulder_pan_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] + self._joint_increment_value / 2 action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 5: # Decrement joint1_position_controller (shoulder_pan_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] - self._joint_increment_value / 2 action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 6: # Increment joint4_position_controller (wrist_1_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] + self._joint_increment_value action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 7: # Decrement joint4_position_controller (wrist_1_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] - self._joint_increment_value action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] elif action == 8: # Increment joint5_position_controller (wrist_2_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] + self._joint_increment_value action_position[5] = joint_states_position[5] elif action == 9: # Decrement joint5_position_controller (wrist_2_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] - self._joint_increment_value action_position[5] = joint_states_position[5] elif action == 10: # Increment joint6_position_controller (wrist_3_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] + self._joint_increment_value elif action == 11: # Decrement joint6_position_controller (wrist_3_joint) action_position[0] = joint_states_position[0] action_position[1] = joint_states_position[1] action_position[2] = joint_states_position[2] action_position[3] = joint_states_position[3] action_position[4] = joint_states_position[4] action_position[5] = joint_states_position[5] - self._joint_increment_value return action_position
def reset(self): """ Reset The Robot to its initial Position and restart the Controllers 1) Publish the initial joint_positions to MoveIt 2) Busy waiting until the movement is completed by MoveIt 3) set target_object to random position 4) Check all Systems work 5) Create YAML Files for contact forces in order to get the average over 2 contacts 6) Create YAML Files for collision to make shure to see a collision due to high noise in topic 7) Get Observations and return current State 8) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number 9) Return State """ # print("Joint (reset): {}".format(np.around(self.joints_state.position, decimals=3))) init_joint_pos = [1.5, -1.2, 1.4, -1.87, -1.57, 0] self.publisher_to_moveit_object.set_joints(init_joint_pos) # print(">>>>>>>>>>>>>>>>>>> RESET: waiting for the movement to complete") # rospy.wait_for_message("/pickbot/movement_complete", Bool) while not self.movement_complete.data: pass # print(">>>>>>>>>>>>>>>>>>> RESET: Waiting complete") start_ros_time = rospy.Time.now() while True: # Check collision: # invalid_collision = self.get_collisions() # if invalid_collision: # print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<") # observation = self.get_obs() # reward = UMath.compute_reward(observation, -200, True) # observation = self.get_obs() # print("Test Joint: {}".format(np.around(observation[1:7], decimals=3))) # return U.get_state(observation), reward, True, {} elapsed_time = rospy.Time.now() - start_ros_time if np.isclose(init_joint_pos, self.joints_state.position, rtol=0.0, atol=0.01).all(): break elif elapsed_time > rospy.Duration(2): # time out break self.set_target_object(random_object=self._random_object, random_position=self._random_position) self._check_all_systems_ready() with open('contact_1_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('contact_2_force.yml', 'w') as yaml_file: yaml.dump(0.0, yaml_file, default_flow_style=False) with open('collision.yml', 'w') as yaml_file: yaml.dump(False, yaml_file, default_flow_style=False) observation = self.get_obs() self.object_position = observation[9:12] # print("Joint (after): {}".format(np.around(observation[1:7], decimals=3))) # get maximum distance to the object to calculate reward self.max_distance, _ = U.get_distance_gripper_to_object() self.min_distace = self.max_distance state = U.get_state(observation) self._update_episode() return state