def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_cb) # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = sys.argv[1] self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.joints_state = JointState() # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed()
'finger_3_position_controller', 'joint_state_controller' ] joint_names = [ 'j2n6s300_joint_1', 'j2n6s300_joint_2', 'j2n6s300_joint_3', 'j2n6s300_joint_4', 'j2n6s300_joint_5', 'j2n6s300_joint_6', 'j2n6s300_finger_1', 'j2n6s300_finger_2', 'j2n6s300_finger_3' ] joint_positions = [0.0, 2.9, 1.3, -2.07, 1.4, 0.0, 1, 1, 1] model_config = SetModelConfigurationRequest() model_config.model_name = 'j2n6s300' model_config.urdf_param_name = '/robot_description' model_config.joint_names = joint_names model_config.joint_positions = joint_positions controllers_object = ControllersConnection(namespace='j2n6s300', controllers_list=controllers_list) pub = rospy.Publisher('/j2n6s300/effort_joint_trajectory_controller/command', JointTrajectory, queue_size=1) pubf = rospy.Publisher('/j2n6s300/effort_joint_trajectory_controller/command', JointTrajectory, queue_size=1) unpause_gazebo = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) pause_gazebo = rospy.ServiceProxy('/gazebo/pause_physics', Empty) reset_sim = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) set_model_srv = rospy.ServiceProxy('/gazebo/set_model_configuration', SetModelConfiguration) jointCmd = JointTrajectory() point = JointTrajectoryPoint() jointCmd.header.stamp = rospy.Time.now() + rospy.Duration.from_sec(0.0)
class URSimReaching(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_cb) # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = sys.argv[1] self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.joints_state = JointState() # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def target_point_cb(self, msg): self.target_point = msg def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: rospy.logdebug( "Current joint_states not ready yet, retrying==>" + str(e)) target_pose_msg = None while target_pose_msg is None and not rospy.is_shutdown(): try: target_pose_msg = rospy.wait_for_message("/target_blocks_pose", Point, timeout=0.1) self.target_point = target_pose_msg rospy.logdebug("Reading target pose READY") except Exception as e: rospy.logdebug( "Reading target pose not ready yet, retrying==>" + str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = tf.transformations.euler_from_quaternion( [self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose = [] self.current_joint_pose = copy.deepcopy(init_pos) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self, msg): self.joints_state = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] shp_joint_vel = joint_states.velocity[0] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[2] wr1_joint_vel = joint_states.velocity[3] wr2_joint_vel = joint_states.velocity[4] wr3_joint_vel = joint_states.velocity[5] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] eef_x, eef_y, eef_z = self.get_xyz(q) observation = [] rospy.logdebug("List of Observations==>" + str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append(shp_joint_ang) elif obs_name == "shl_joint_ang": observation.append(shl_joint_ang) elif obs_name == "elb_joint_ang": observation.append(elb_joint_ang) elif obs_name == "wr1_joint_ang": observation.append(wr1_joint_ang) elif obs_name == "wr2_joint_ang": observation.append(wr2_joint_ang) elif obs_name == "wr3_joint_ang": observation.append(wr3_joint_ang) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append(eef_x) elif obs_name == "eef_y": observation.append(eef_y) elif obs_name == "eef_z": observation.append(eef_z) else: raise NameError('Observation Asked does not exist==' + str(obs_name)) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max( min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max( min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max( min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max( min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max( min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max( min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) # Resets the state of the environment and returns an initial observation. def reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose)) # We save that position as the current joint desired position init_pos = self.init_joints_pose(self.init_joint_pose) print("init_pos") # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") print("check_publishers_connection ") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self._gz_conn.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() # self._init_obj_pose() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.get_observations() print('Reset final') return observation def _act(self, action): if self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.move_joints(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def step(self, action): rospy.logdebug("UR step func") # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() self._act(action) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards() done = self.check_done() return observation, reward, done, {} def compute_dist_rewards(self): return 0 def check_done(self): return True
def __init__(self): rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy('/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = {"shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1] # print("[init_joint_pose1]: ", [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1]) shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2] # print("[init_joint_pose2]: ", [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2]) shp_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shp") shl_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shl") elb_after_rotate = rospy.get_param("/eelink_pose_after_rotate/elb") wr1_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr1") wr2_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr2") wr3_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr3") self.after_rotate = [shp_after_rotate, shl_after_rotate, elb_after_rotate, wr1_after_rotate, wr2_after_rotate, wr3_after_rotate] shp_after_pull = rospy.get_param("/eelink_pose_after_pull/shp") shl_after_pull = rospy.get_param("/eelink_pose_after_pull/shl") elb_after_pull = rospy.get_param("/eelink_pose_after_pull/elb") wr1_after_pull = rospy.get_param("/eelink_pose_after_pull/wr1") wr2_after_pull = rospy.get_param("/eelink_pose_after_pull/wr2") wr3_after_pull = rospy.get_param("/eelink_pose_after_pull/wr3") self.after_pull = [shp_after_pull, shl_after_pull, elb_after_pull, wr1_after_pull, wr2_after_pull, wr3_after_pull] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") self.init_grp_pose1 = [r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1] self.init_grp_pose2 = [r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.end_effector = Point() self.previous_action =[] self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 21 #np.arange(self.get_observations().shape[0]) ## self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service('/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service('/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) # set_gripper_server = rospy.Service('/set_gripper_controller', SetBool, self._set_grp_ctrl) self.pos_traj_controller = ['joint_state_controller', 'gripper_controller', 'pos_traj_controller'] self.pos_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller"] self.vel_traj_controller = ['joint_state_controller', 'gripper_controller', 'vel_traj_controller'] self.vel_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller"] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig)
class LaelapsEnvEllipse(gym.Env): def __init__(self): # fixed frame is nearly on the origin of the ground 0,0,0.6 at laelaps body center of mass self.last_base_position = [0, 0, 0] self.distance_weight = 1 self.drift_weight = 2 self.time_step = 0.001 # default Gazebo simulation time step self.episode_number = 0 self.frames = 0 self.torques_step = [] self.euler_angles = [] self.euler_rates = [] self.base_zaxis = [] self.base_x_y = [] self.sim_t = [] self.saving_option = False # Rospy get parameters from config file: self.laelaps_model_number = rospy.get_param("/laelaps_model_number") self.ramp_model_number = rospy.get_param("/ramp_model_number") self.ramp_available = rospy.get_param("/ramp_available") self.env_goal = rospy.get_param("/env_goal") self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection( namespace="laelaps_robot", controllers_list=controllers_list) # _______________SUBSCRIBERS__________________________________________________ # give base position and quaternions rospy.Subscriber(gazebo_model_states_topic, ModelStates, self.models_callback) # give motor angle, velocity, torque rospy.Subscriber(joint_state_topic, JointState, self.joints_state_callback) rospy.Subscriber("/clock", Clock, self.clock_callback) rospy.Subscriber(rf_toe_nan_topic, Bool, self.rf_toe_nan_callback) rospy.Subscriber(rh_toe_nan_topic, Bool, self.rh_toe_nan_callback) rospy.Subscriber(lf_toe_nan_topic, Bool, self.lf_toe_nan_callback) rospy.Subscriber(lh_toe_nan_topic, Bool, self.lh_toe_nan_callback) # _______________MOTOR PUBLISHERS__________________________________________________ self.motor_pub = list() for joint in motor_joints: joint_controller = "/laelaps_robot/" + str(joint) + "/command" x = rospy.Publisher(joint_controller, Float64, queue_size=1) self.motor_pub.append(x) # # _______________Toe PUBLISHERS__________________________________________________ # toe4_pos_publisher node : RH_foot: toe1 , RF_foot: toe2, LF_foot: toe3, LH_foot: toe4 self.toe_pub = list() for idx in range(len(laelaps_feet)): toe_commands = "/laelaps_robot/toe" + str(idx + 1) + "/command" x = rospy.Publisher(toe_commands, Toe, queue_size=1) self.toe_pub.append(x) # ______________Defining observation and action space____________________________ observation_high = (self.GetObservationUpperBound() + observation_eps) observation_low = (self.GetObservationLowerBound() - observation_eps) # Four legs toe x,y are estimated by RL low = [x_low] * 4 low.extend([y_low] * 4) high = [x_high] * 4 high.extend([y_high] * 4) self.action_space = spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32) # the epsilon to avoid 0 values entering the neural network in the algorithm observation_high = (self.GetObservationUpperBound() + observation_eps) observation_low = (self.GetObservationLowerBound() - observation_eps) self.observation_space = spaces.Box(observation_low, observation_high, dtype=np.float32) # ______________Reset and seed the environment____________________________ self.seed() # seed the environment in the initial function self.init_reset() # reset the environment in the initial function def GetObservationUpperBound(self): upper_bound = np.zeros(6) # 6 observation space dimension upper_bound[0:3] = [2 * math.pi] * 3 # roll,pitch yaw upper_bound[3:6] = [2 * math.pi / self.time_step] * \ 3 # Roll, pitch, yaw rate return upper_bound def GetObservationLowerBound(self): return -1 * self.GetObservationUpperBound() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # _______________call back functions____________________ def clock_callback(self, msg): global sim_time sim_time_s = msg.clock.secs sim_time_ns = msg.clock.nsecs * 1e-9 sim_time = sim_time_s + sim_time_ns if episode_start: if self.saving_option: self.sim_t.append(sim_time) def models_callback(self, msg): self.base_pos = msg.pose[self.laelaps_model_number].position self.base_orientation = msg.pose[self.laelaps_model_number].orientation self.base_velocity = msg.twist[self.laelaps_model_number].linear self.base_angular_velocity = msg.twist[ self.laelaps_model_number].angular if self.ramp_available: # both ramps have the same inclination self.ramp_inclination = msg.pose[ self.ramp_model_number].orientation if episode_start: if self.saving_option: self.base_zaxis.append([self.base_pos.z, self.base_velocity.z]) self.base_x_y.append([self.base_pos.x, self.base_pos.y]) euler = self.transfrom_euler_from_quaternion([ self.base_orientation.x, self.base_orientation.y, self.base_orientation.z, self.base_orientation.w ]) self.euler_angles.append(euler) self.euler_rates.append([ self.base_angular_velocity.x, self.base_angular_velocity.y, self.base_angular_velocity.z ]) def GetLaelapsBaseInfo(self): p = self.base_pos q = self.base_orientation v = self.base_velocity w = self.base_angular_velocity base_rot_x = q.x base_rot_y = q.y base_rot_z = q.z base_rot_w = q.w base_orientation = [base_rot_x, base_rot_y, base_rot_z, base_rot_w] base_p_x = p.x base_p_y = p.y base_p_z = p.z base_pos = [base_p_x, base_p_y, base_p_z] base_v_x = v.x base_v_y = v.y base_v_z = v.z base_vel = [base_v_x, base_v_y, base_v_z] base_w_x = w.x base_w_y = w.y base_w_z = w.z base_angular_vel = [base_w_x, base_w_y, base_w_z] return base_pos, base_orientation, base_vel, base_angular_vel def GetRampInclination(self): ramp_q = self.ramp_inclination ramp_rot_x = ramp_q.x ramp_rot_y = ramp_q.y ramp_rot_z = ramp_q.z ramp_rot_w = ramp_q.w ramp_orientation = [ramp_rot_x, ramp_rot_y, ramp_rot_z, ramp_rot_w] euler = self.transfrom_euler_from_quaternion(ramp_orientation) ramp_inclination = euler[1] # ramp inclination pitch (around y-axis) return ramp_inclination def joints_state_callback(self, msg): global joints_states joints_states = msg def GetMotorAngles(self): global joints_states motor_angles = joints_states.position return motor_angles def GetMotorVelocities(self): global joints_states motor_velocities = joints_states.velocity return motor_velocities def GetMotorTorques(self): global joints_states motor_torques = joints_states.effort return motor_torques if episode_start: if self.saving_option: self.torques_step.append(joints_states.effort) def rf_toe_nan_callback(self, msg): self.RF_toe_isnan = msg.data def rh_toe_nan_callback(self, msg): self.RH_toe_isnan = msg.data def lf_toe_nan_callback(self, msg): self.LF_toe_isnan = msg.data def lh_toe_nan_callback(self, msg): self.LH_toe_isnan = msg.data def GetNanToeCheck(self): return [ self.RF_toe_isnan, self.RH_toe_isnan, self.LF_toe_isnan, self.LH_toe_isnan ] # _______________Quaternion to Euler conversion_____________________________________ def transfrom_euler_from_quaternion(self, quaternion): # input quaternion should be list example: [0.06146124, 0, 0, 0.99810947] q = np.array(quaternion[:4], dtype=np.float64, copy=True) nq = np.dot(q, q) # gives scalar if nq < eps: H = np.identity(4) # identity matrix of 4x4 q *= math.sqrt(2.0 / nq) q = np.outer(q, q) H = np.array( ((1.0 - q[1, 1] - q[2, 2], q[0, 1] - q[2, 3], q[0, 2] + q[1, 3], 0.0), (q[0, 1] + q[2, 3], 1.0 - q[0, 0] - q[2, 2], q[1, 2] - q[0, 3], 0.0), (q[0, 2] - q[1, 3], q[1, 2] + q[0, 3], 1.0 - q[0, 0] - q[1, 1], 0.0), (0.0, 0.0, 0.0, 1.0)), dtype=np.float64) # Homogenous transformation matrix # Obtain euler angles from Homogenous transformation matrix: # Note that many Euler angle triplets can describe one matrix. # take only first three rows and first three coloumns = rotation matrix M = np.array(H, dtype=np.float64, copy=False)[:3, :3] i = 0 # x-axes (first coloumn) j = 1 # y-axes k = 2 # z-axis cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i]) if cy > eps: ax = math.atan2(M[k, j], M[k, k]) ay = math.atan2(-M[k, i], cy) az = math.atan2(M[j, i], M[i, i]) else: ax = math.atan2(-M[j, k], M[j, j]) ay = math.atan2(-M[k, i], cy) az = 0.0 euler = [ax, ay, az] return euler # roll, pitch ,yaw # __________________________________Get Observations____________________________________________________________ def GetObservation(self): observation = [] # returns base: position, orientation(in quaternion), linear velocity, angular velocity _, base_orientation_quaternion, _, euler_rate = self.GetLaelapsBaseInfo( ) euler = self.transfrom_euler_from_quaternion( base_orientation_quaternion) observation.extend(euler) observation.extend(euler_rate) return observation # ____________________________________ROS wait_________________________________________________________________________________ def ros_wait(self, t): # It makes the execution of the next command in the python-ROS node wait for t secs, equivalent to rospy.sleep(time) start = rospy.get_rostime() ros_time_start = start.secs + start.nsecs * 1e-9 timeout = ros_time_start + t # example 0.6 sec wait = True while wait: now = rospy.get_rostime() ros_time_now = now.secs + now.nsecs * 1e-9 if ros_time_now >= timeout: wait = False # _______________Publish commands from ROS to Gazebo__________________________________________________________________________________ def publish_threads_motor_angles(self, i, motor_angle): self.motor_pub[i].publish(motor_angle[i]) def publish_motor_angles(self, motor_angle): threads = list() for index in range(8): # logging.info("Main : create and start thread %d.", index) x = threading.Thread(target=self.publish_threads_motor_angles, args=(index, motor_angle)) threads.append(x) x.start() for index, thread in enumerate(threads): # logging.info("Main : before joining thread %d.", index) thread.join() # logging.info("Main : thread %d done", index) def publish_threads_toe(self, i, toe_commands, phase): toe_class_msg.toex = toe_commands[i] # toe_x toe_class_msg.toey = toe_commands[i + 4] # toe_y toe_class_msg.phase = phase[i] self.toe_pub[i].publish(toe_class_msg) def pubilsh_toe_commands(self, toe_x_y, phase_shift): threads = list() for index in range(len(laelaps_feet)): # toe_class_msg=Toe() x = threading.Thread(target=self.publish_threads_toe, args=(index, toe_x_y, phase_shift)) threads.append(x) x.start() for index, thread in enumerate(threads): thread.join() # _______________Reset function__________________________________________________________________________________ def init_reset(self): global init_angles, init_toe_commands, init_phase_shift self.gazebo.unpauseSim() self.gazebo.resetSim() self.gazebo.pauseSim() self.gazebo.resetJoints(init_angles) self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.pubilsh_toe_commands(init_toe_commands, init_phase_shift) # wait some time until robot stabilizes itself in the environment self.ros_wait(0.01) self.gazebo.pauseSim() self.episode_reward = 0 return self.GetObservation() def reset(self): global init_angles, init_toe_commands, init_phase_shift, saving_path, episode_savingpath self.gazebo.unpauseSim() self.gazebo.resetSim() self.gazebo.pauseSim() # rospy.loginfo("-----RESET-----") self.gazebo.resetJoints(init_angles) # reset joint angles self.gazebo.unpauseSim() # Reset JoinStateControlers because resetSim doesnt reset TFs, generating issues with simulation time self.controllers_object.reset_controllers() self.pubilsh_toe_commands(init_toe_commands, init_phase_shift) self.ros_wait(0.01) self.gazebo.pauseSim() # self.plot_tensorboard("Episode Total Rewards", self.episode_reward,self.frames) # Initialize Episode Values # reset the variable base position to 0 self.last_base_position = [0.0, 0.0, 0.0] self.episode_step = 0 self.episode_reward = 0 self.episode_number += 1 episode_savingpath = str(saving_path) + \ "/Episode_" + str(self.episode_number) if self.saving_option: os.makedirs(episode_savingpath) self.save_time("Start") return self.GetObservation() # initial state S_0 # _______________Step function__________________________________________________________________________________ def step(self, action): global x_high, x_low, y_high, y_low, step_phase_shift # Action values from NN from [-1,1] are mapped to the action space limits action_x = interp(action[0:4], [-1, 1], [x_low, x_high]) action_y = interp(action[4:8], [-1, 1], [y_low, y_high]) action = np.append([action_x], [action_y]) # rospy.loginfo("------Step Action: %s",action) time_out = rospy.get_time() + step_time_out while True: self.gazebo.unpauseSim() self.pubilsh_toe_commands(action, step_phase_shift) self.gazebo.pauseSim() done = self.termination() if rospy.get_time() > time_out or done == True: break else: continue self.gazebo.pauseSim() reward = self.reward() self.episode_reward += reward # rospy.loginfo("-------------EnvFram idx %s : Reward in Env per Step %s-------------", self.frames ,reward) # Tensorboard # self.plot_tensorboard("env step r", reward,self.frames) self.frames += 1 self.episode_step += 1 return self.GetObservation(), reward, done, {} # ______________Episode Termination________________________________________________________________________________ def termination(self): global episode_start pos, quaternion, _, _ = self.GetLaelapsBaseInfo() roll, pitch, yaw = self.transfrom_euler_from_quaternion(quaternion) if self.ramp_available: # pitch angle of the ramp, inclined slopes goal 3.2 m ramp_pitch = self.GetRampInclination() is_fallen = math.fabs(roll) > 0.3 or math.fabs( pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs( yaw ) > 0.2 or math.fabs(pos[0]) >= self.env_goal or math.fabs( pos[1]) > 0.2 # 3.2 m is nearly the end of the ramp else: ramp_pitch = 0 # flat terrain goal 6 m is_fallen = math.fabs(roll) > 0.3 or math.fabs( pitch) > 0.3 + math.fabs(ramp_pitch) or math.fabs( yaw) > 0.2 or math.fabs( pos[0]) >= self.env_goal or math.fabs( pos[1]) > 0.2 or math.fabs(pos[2]) > 0.17 if is_fallen: self.save_time("Finish") # episode finish self.save_termination([ math.fabs(roll), math.fabs(pitch), math.fabs(yaw), math.fabs(pos[0]), math.fabs(pos[1]) ]) rospy.loginfo( "ROll: %s : %s , Pitch: %s : %s , Yaw: %s : %s , X: %s : %s , Y: %s : %s , Z: %s : %s", math.fabs(roll), math.fabs(roll) > 0.3, math.fabs(pitch), math.fabs(pitch) > 0.3 + math.fabs(ramp_pitch), math.fabs(yaw), math.fabs(yaw) > 0.2, math.fabs(pos[0]), math.fabs(pos[0]) >= 3.2, math.fabs(pos[1]), math.fabs(pos[1]) >= 0.2, math.fabs(pos[2]), math.fabs(pos[2]) > 0.17) # SAVING self.save_torques(self.torques_step, self.frames) self.save_baze_zaxis(self.base_zaxis, self.frames) self.save_baze_x_y(self.base_x_y, self.frames) self.save_euler_angles(self.euler_angles, self.frames) self.save_angular_velocities(self.euler_rates, self.frames) self.save_sim_time(self.sim_t, self.frames) self.torques_step = [] self.euler_angles = [] self.euler_rates = [] self.base_zaxis = [] self.base_x_y = [] self.sim_t = [] episode_start = False return is_fallen # _______________Reward function__________________________________________________________________________________ def reward(self): nan_check = self.GetNanToeCheck() if any(nan_check): rospy.loginfo( "---Angles Inverse Kinematics: NaN %s, try again---r=-1", nan_check) reward = -1 else: if self.ramp_available: ramp_pitch = self.GetRampInclination( ) # pitch angle of the ramp else: ramp_pitch = 0 current_base_position, _, _, _ = self.GetLaelapsBaseInfo() # Reward forward motion forward_reward = math.fabs( current_base_position[0] / math.cos(ramp_pitch)) - math.fabs( self.last_base_position[0] / math.cos(ramp_pitch)) # rospy.loginfo("current_base_position[0] %s self.last_base_position[0] %s forward_reward %s",math.fabs(current_base_position[0]),math.fabs(self.last_base_position[0]),forward_reward ) # Penalize drifting drift_reward = math.fabs(current_base_position[1]) - math.fabs( self.last_base_position[1]) # rospy.loginfo("current_base_position[1] %s self.last_base_position[1] %s drift_reward %s",math.fabs(current_base_position[1]),math.fabs(self.last_base_position[1]),drift_reward ) self.last_base_position = current_base_position reward = (self.distance_weight * forward_reward - self.drift_weight * drift_reward) return reward # ______________Saving values and ploting tensorboard_____________________________________________- def tensorboardwriter(self, folder, use_tensorboardEnV, use_tensorboardAlg): self.enable_tensorboard = use_tensorboardEnV if use_tensorboardEnV or use_tensorboardAlg: self.writer = SummaryWriter(folder) else: self.writer = [] return self.writer def savingpath(self, folder, saving_option): global saving_path self.saving_option = saving_option saving_path = folder def plot_tensorboard(self, title, var1, var2): if self.enable_tensorboard: self.writer.add_scalar(title, var1, var2) def save_actions(self, action, step_number): global episode_savingpath if self.saving_option: action = np.array(action, dtype=np.float64) np.save(str(episode_savingpath) + "/" + str(step_number), action) def save_torques(self, torques, step_number): global episode_savingpath if self.saving_option: torques = np.array(torques, dtype=np.float64) np.save( str(episode_savingpath) + "/Torques_" + str(step_number), torques) def save_euler_angles(self, euler_angles, step_number): global episode_savingpath if self.saving_option: euler_angles = np.array(euler_angles, dtype=np.float64) np.save( str(episode_savingpath) + "/EulerAngles_" + str(step_number), euler_angles) def save_angular_velocities(self, angular_velocities, step_number): global episode_savingpath if self.saving_option: angular_velocities = np.array(angular_velocities, dtype=np.float64) np.save( str(episode_savingpath) + "/AngularVelocities_" + str(step_number), angular_velocities) def save_baze_zaxis(self, base_zaxis, step_number): global episode_savingpath if self.saving_option: base_zaxis = np.array(base_zaxis, dtype=np.float64) np.save( str(episode_savingpath) + "/BaseZaxis_" + str(step_number), base_zaxis) def save_baze_x_y(self, base_x_y, step_number): global episode_savingpath if self.saving_option: base_x_y = np.array(base_x_y, dtype=np.float64) np.save( str(episode_savingpath) + "/BaseX_Y" + str(step_number), base_x_y) def save_sim_time(self, sim_t, step_number): global episode_savingpath if self.saving_option: sim_t = np.array(sim_t, dtype=np.float64) np.save( str(episode_savingpath) + "/Sim_t" + str(step_number), sim_t) def save_time(self, start_or_finish): global episode_savingpath if self.saving_option: episode_time = str(time.ctime(time.time())) np.save( str(episode_savingpath) + "/Episode_" + str(start_or_finish), episode_time) def save_termination(self, termination_critiria): global episode_savingpath if self.saving_option: np.save( str(episode_savingpath) + "/termination", termination_critiria) def close(self): rospy.logdebug("Closing RobotGazeboEnvironment") rospy.signal_shutdown("Closing RobotGazeboEnvironment")
def velocity_test(): """ Test of the time it takes to reach a certain speed """ speed_value = 10.0 wait_time = 0.1 rospy.init_node('cartpole_speed_test_node', anonymous=True, log_level=rospy.WARN) # Controllers Info controllers_list = [ 'joint_state_controller', 'pole_joint_velocity_controller', 'foot_joint_velocity_controller', ] robot_name_space = "cartpole_v0" controllers_object = ControllersConnection( namespace=robot_name_space, controllers_list=controllers_list) debug_object = MoveCartClass() start_init_physics_parameters = True reset_world_or_sim = "SIMULATION" gazebo = GazeboConnection(start_init_physics_parameters, reset_world_or_sim) rospy.loginfo("RESETING SIMULATION") gazebo.pauseSim() gazebo.resetSim() gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") debug_object.get_clock_time() rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") controllers_object.reset_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN") debug_object.get_clock_time() rospy.loginfo("START CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("SET init pose...") debug_object.set_init_pose() rospy.loginfo("WAIT FOR GOING TO INIT POSE") time.sleep(wait_time) raw_input("Start Movement...PRESS KEY") i = 0 wait_times_m = [] while not rospy.is_shutdown(): vel_x = [speed_value] rospy.loginfo("Moving RIGHT...") debug_object.move_joints([speed_value]) delta_time = debug_object.wait_until_base_is_in_vel([speed_value]) wait_times_m = numpy.append(wait_times_m, [delta_time]) rospy.loginfo("Moving STOP...") debug_object.move_joints([0.0]) delta_time = debug_object.wait_until_base_is_in_vel(0.0) wait_times_m = numpy.append(wait_times_m, [delta_time]) raw_input("Start Movement...PRESS KEY") rospy.loginfo("Moving LEFT...") debug_object.move_joints([-1 * speed_value]) delta_time = debug_object.wait_until_base_is_in_vel([-1 * speed_value]) wait_times_m = numpy.append(wait_times_m, [delta_time]) rospy.loginfo("Moving STOP...") debug_object.move_joints([0.0]) delta_time = debug_object.wait_until_base_is_in_vel(0.0) wait_times_m = numpy.append(wait_times_m, [delta_time]) raw_input("Start Movement...PRESS KEY") i += 1 if i > 10: average_time = numpy.average(wait_times_m) rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]") break
class OldMouseEnv(gym.Env): def __init__(self): number_actions = rospy.get_param('/mouse/n_actions') self.action_space = spaces.Discrete(number_actions) self._seed() #get configuration parameters self.init_roll_vel = rospy.get_param('/mouse/init_roll_vel') # Actions self.roll_speed_fixed_value = rospy.get_param('/mouse/roll_speed_fixed_value') self.roll_speed_increment_value = rospy.get_param('/mouse/roll_speed_increment_value') self.start_point = Point() self.start_point.x = rospy.get_param("/mouse/init_mouse_pose/x") self.start_point.y = rospy.get_param("/mouse/init_mouse_pose/y") self.start_point.z = rospy.get_param("/mouse/init_mouse_pose/z") # Done self.max_pitch_angle = rospy.get_param('/mouse/max_pitch_angle') # Rewards self.move_distance_reward_weight = rospy.get_param("/mouse/move_distance_reward_weight") self.y_linear_speed_reward_weight = rospy.get_param("/mouse/y_linear_speed_reward_weight") self.y_axis_angle_reward_weight = rospy.get_param("/mouse/y_axis_angle_reward_weight") self.end_episode_points = rospy.get_param("/mouse/end_episode_points") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_list = ['joint_state_controller', 'right_front_diff_drive_controller' ] self.controllers_object = ControllersConnection(namespace="mouse", controllers_list=self.controllers_list) self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.check_all_sensors_ready() rospy.Subscriber("/mouse/joint_states", JointState, self.joints_callback) rospy.Subscriber("/mouse/odom", Odometry, self.odom_callback) self._roll_vel_pub = rospy.Publisher('/mouse/right_front_diff_drive_controller/command', Float64, queue_size=1) self.check_publishers_connection() self.gazebo.pauseSim() def _seed(self, seed=None): #overriden function self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action):#overriden function self.gazebo.unpauseSim() self.set_action(action) self.gazebo.pauseSim() obs = self._get_obs() done = self._is_done(obs) info = {} reward = self.compute_reward(obs, done) simplified_obs = self.convert_obs_to_state(obs) return simplified_obs, reward, done, info def reset(self): self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.check_all_sensors_ready() self.set_init_pose() self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.check_all_sensors_ready() self.gazebo.pauseSim() self.init_env_variables() obs = self._get_obs() simplified_obs = self.convert_obs_to_state(obs) return simplified_obs def init_env_variables(self): """ Inits variables needed to be initialised each time we reset at the start of an episode. :return: """ self.total_distance_moved = 0.0 self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point) self.roll_turn_speed = rospy.get_param('/mouse/init_roll_vel') def _is_done(self, observations): pitch_angle = observations[3] if abs(pitch_angle) > self.max_pitch_angle: rospy.logerr("WRONG Mouse Pitch Orientation==>" + str(pitch_angle)) done = True else: rospy.logdebug("Mouse Pitch Orientation Ok==>" + str(pitch_angle)) done = False return done def set_action(self, action): # We convert the actions to speed movements to send to the parent class MouseSingleDiskEnv if action == 0:# Move Speed Wheel Forwards self.roll_turn_speed = self.roll_speed_fixed_value elif action == 1:# Move Speed Wheel Backwards self.roll_turn_speed = self.roll_speed_fixed_value elif action == 2:# Stop Speed Wheel self.roll_turn_speed = 0.0 elif action == 3:# Increment Speed self.roll_turn_speed += self.roll_speed_increment_value elif action == 4:# Decrement Speed self.roll_turn_speed -= self.roll_speed_increment_value # We clamp Values to maximum rospy.logdebug("roll_turn_speed before clamp=="+str(self.roll_turn_speed)) self.roll_turn_speed = numpy.clip(self.roll_turn_speed, -1*self.roll_speed_fixed_value, self.roll_speed_fixed_value) rospy.logdebug("roll_turn_speed after clamp==" + str(self.roll_turn_speed)) # We tell the OneDiskMouse to spin the RollDisk at the selected speed self.move_joints(self.roll_turn_speed) def _get_obs(self): """ Here we define what sensor data defines our robots observations To know which Variables we have acces to, we need to read the MouseSingleDiskEnv API DOCS :return: """ # We get the orientation of the mouse in RPY roll, pitch, yaw = self.get_orientation_euler() # We get the distance from the origin y_distance = self.get_y_dir_distance_from_start_point(self.start_point) # We get the current speed of the Roll Disk current_disk_roll_vel = self.get_roll_velocity() # We get the linear speed in the y axis y_linear_speed = self.get_y_linear_speed() mouse_observations = [ round(current_disk_roll_vel, 0), round(y_distance, 1), round(roll, 1), round(pitch, 1), round(y_linear_speed,1), round(yaw, 1), ] return mouse_observations def get_orientation_euler(self): # We convert from quaternions to euler orientation_list = [self.odom.pose.pose.orientation.x, self.odom.pose.pose.orientation.y, self.odom.pose.pose.orientation.z, self.odom.pose.pose.orientation.w] roll, pitch, yaw = euler_from_quaternion(orientation_list) return roll, pitch, yaw def get_roll_velocity(self): # We get the current joint roll velocity roll_vel = self.joints.velocity[0] return roll_vel def get_y_linear_speed(self): # We get the current joint roll velocity y_linear_speed = self.odom.twist.twist.linear.y return y_linear_speed def get_y_dir_distance_from_start_point(self, start_point): """ Calculates the distance from the given point and the current position given by odometry. In this case the increase or decrease in y. :param start_point: :return: """ y_dist_dir = self.odom.pose.pose.position.y - start_point.y return y_dist_dir def compute_reward(self, observations, done): if not done: y_distance_now = observations[1] delta_distance = y_distance_now - self.current_y_distance rospy.logdebug("y_distance_now=" + str(y_distance_now)+", current_y_distance=" + str(self.current_y_distance)) rospy.logdebug("delta_distance=" + str(delta_distance)) reward_distance = delta_distance * self.move_distance_reward_weight self.current_y_distance = y_distance_now y_linear_speed = observations[4] rospy.logdebug("y_linear_speed=" + str(y_linear_speed)) reward_y_axis_speed = y_linear_speed * self.y_linear_speed_reward_weight # Negative Reward for yaw different from zero. yaw_angle = observations[5] rospy.logdebug("yaw_angle=" + str(yaw_angle)) # Worst yaw is 90 and 270 degrees, best 0 and 180. We use sin function for giving reward. sin_yaw_angle = math.sin(yaw_angle) rospy.logdebug("sin_yaw_angle=" + str(sin_yaw_angle)) reward_y_axis_angle = -1 * abs(sin_yaw_angle) * self.y_axis_angle_reward_weight # We are not intereseted in decimals of the reward, doesnt give any advatage. reward = round(reward_distance, 0) + round(reward_y_axis_speed, 0) + round(reward_y_axis_angle, 0) rospy.logdebug("reward_distance=" + str(reward_distance)) rospy.logdebug("reward_y_axis_speed=" + str(reward_y_axis_speed)) rospy.logdebug("reward_y_axis_angle=" + str(reward_y_axis_angle)) rospy.logdebug("reward=" + str(reward)) else: reward = -1*self.end_episode_points return reward def joints_callback(self, data): self.joints = data def odom_callback(self, data): self.odom = data def check_all_sensors_ready(self): self.check_joint_states_ready() self.check_odom_ready() rospy.logdebug("ALL SENSORS READY") def check_joint_states_ready(self): self.joints = None while self.joints is None and not rospy.is_shutdown(): try: self.joints = rospy.wait_for_message("/mouse/joint_states", JointState, timeout=1.0) rospy.logdebug("Current mouse/joint_states READY=>" + str(self.joints)) except: rospy.logerr("Current mouse/joint_states not ready yet, retrying for getting joint_states") return self.joints def check_odom_ready(self): self.odom = None while self.odom is None and not rospy.is_shutdown(): try: self.odom = rospy.wait_for_message("/mouse/odom", Odometry, timeout=1.0) rospy.logdebug("Current /mouse/odom READY=>" + str(self.odom)) except: rospy.logerr("Current /mouse/odom not ready yet, retrying for getting odom") return self.odom def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(10) # 10hz while (self._roll_vel_pub.get_num_connections() == 0 and not rospy.is_shutdown()): rospy.logdebug("No susbribers to _roll_vel_pub yet so we wait and try again") try: rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_base_pub Publisher Connected") rospy.logdebug("All Publishers READY") def move_joints(self, roll_speed): joint_speed_value = Float64() joint_speed_value.data = roll_speed rospy.logdebug("Single Disk Roll Velocity>>" + str(joint_speed_value)) self._roll_vel_pub.publish(joint_speed_value) self.wait_until_roll_is_in_vel(joint_speed_value.data) def wait_until_roll_is_in_vel(self, velocity): rate = rospy.Rate(10) start_wait_time = rospy.get_rostime().to_sec() end_wait_time = 0.0 epsilon = 0.1 v_plus = velocity + epsilon v_minus = velocity - epsilon while not rospy.is_shutdown(): joint_data = self.check_joint_states_ready() roll_vel = joint_data.velocity[0] rospy.logdebug("VEL=" + str(roll_vel) + ", ?RANGE=[" + str(v_minus) + ","+str(v_plus)+"]") are_close = (roll_vel <= v_plus) and (roll_vel > v_minus) if are_close: rospy.logdebug("Reached Velocity!") end_wait_time = rospy.get_rostime().to_sec() break rospy.logdebug("Not there yet, keep waiting...") rate.sleep() delta_time = end_wait_time- start_wait_time rospy.logdebug("[Wait Time=" + str(delta_time)+"]") return delta_time def set_init_pose(self): """Sets the Robot in its init pose """ self.move_joints(self.init_roll_vel) return True def convert_obs_to_state(self,observations): """ Converts the observations used for reward and so on to the essentials for the robot state In this case we only need the orientation of the mouse and the speed of the disc. The distance doesnt condition at all the actions """ disk_roll_vel = observations[0] y_linear_speed = observations[4] yaw_angle = observations[5] state_converted = [disk_roll_vel, y_linear_speed, yaw_angle] return state_converted
class URSimEnv(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): # We assume that a ROS node has already been created before initialising the environment # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.list_of_observations = rospy.get_param("/list_of_observations") haa_max = rospy.get_param("/joint_limits_array/haa_max") haa_min = rospy.get_param("/joint_limits_array/haa_min") hfe_max = rospy.get_param("/joint_limits_array/hfe_max") hfe_min = rospy.get_param("/joint_limits_array/hfe_min") kfe_max = rospy.get_param("/joint_limits_array/kfe_max") kfe_min = rospy.get_param("/joint_limits_array/kfe_min") self.joint_limits = { "haa_max": haa_max, "haa_min": haa_min, "hfe_max": hfe_max, "hfe_min": hfe_min, "kfe_max": kfe_max, "kfe_min": kfe_min } self.discrete_division = rospy.get_param("/discrete_division") self.maximum_base_linear_acceleration = rospy.get_param( "/maximum_base_linear_acceleration") self.maximum_base_angular_velocity = rospy.get_param( "/maximum_base_angular_velocity") self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") haa_init_value = rospy.get_param("/init_joint_pose/haa") hfe_init_value = rospy.get_param("/init_joint_pose/hfe") kfe_init_value = rospy.get_param("/init_joint_pose/kfe") self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # Jump Increment Value in Radians self.jump_increment = rospy.get_param("/jump_increment") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="ur") self.ur_state_object = URState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, list_of_observations=self.list_of_observations, joint_limits=self.joint_limits, episode_done_criteria=self.episode_done_criteria, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5, discrete_division=self.discrete_division, maximum_base_linear_acceleration=self. maximum_base_linear_acceleration, maximum_base_angular_velocity=self.maximum_base_angular_velocity, maximum_joint_effort=self.maximum_joint_effort, jump_increment=self.jump_increment) self.ur_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.ur_joint_pubisher_object = JointPub() """ For this version, we consider 5 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5) Dont Move 6) Perform One Jump """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self.controllers_object.reset_ur_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose)) # We save that position as the current joint desired position init_pos = self.ur_state_object.init_joints_pose(self.init_joint_pose) # 4th: We Set the init pose to the jump topic so that the jump control can update rospy.logdebug("Publish init_pose for Jump Control...>>>" + str(init_pos)) # We check the jump publisher has connection self.ur_joint_pubisher_object.check_publishers_connection() # We move the joints to position, no jump do_jump = False self.ur_joint_pubisher_object.move_joints_jump(init_pos, do_jump) # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.ur_state_object.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.ur_state_object.get_observations() state = self.get_state(observation) return state def _step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponds to which joint is incremented next_action_position, do_jump = self.ur_state_object.get_action_to_position( action) # We move it to that pos self.gazebo.unpauseSim() self.ur_joint_pubisher_object.move_joints_jump(next_action_position, do_jump) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.ur_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward, done = self.ur_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.ur_state_object.get_state_as_string(observation)
def __init__(self): # We assume that a ROS node has already been created before initialising the environment # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.list_of_observations = rospy.get_param("/list_of_observations") haa_max = rospy.get_param("/joint_limits_array/haa_max") haa_min = rospy.get_param("/joint_limits_array/haa_min") hfe_max = rospy.get_param("/joint_limits_array/hfe_max") hfe_min = rospy.get_param("/joint_limits_array/hfe_min") kfe_max = rospy.get_param("/joint_limits_array/kfe_max") kfe_min = rospy.get_param("/joint_limits_array/kfe_min") self.joint_limits = { "haa_max": haa_max, "haa_min": haa_min, "hfe_max": hfe_max, "hfe_min": hfe_min, "kfe_max": kfe_max, "kfe_min": kfe_min } self.discrete_division = rospy.get_param("/discrete_division") self.maximum_base_linear_acceleration = rospy.get_param( "/maximum_base_linear_acceleration") self.maximum_base_angular_velocity = rospy.get_param( "/maximum_base_angular_velocity") self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") haa_init_value = rospy.get_param("/init_joint_pose/haa") hfe_init_value = rospy.get_param("/init_joint_pose/hfe") kfe_init_value = rospy.get_param("/init_joint_pose/kfe") self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # Jump Increment Value in Radians self.jump_increment = rospy.get_param("/jump_increment") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="ur") self.ur_state_object = URState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, list_of_observations=self.list_of_observations, joint_limits=self.joint_limits, episode_done_criteria=self.episode_done_criteria, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5, discrete_division=self.discrete_division, maximum_base_linear_acceleration=self. maximum_base_linear_acceleration, maximum_base_angular_velocity=self.maximum_base_angular_velocity, maximum_joint_effort=self.maximum_joint_effort, jump_increment=self.jump_increment) self.ur_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.ur_joint_pubisher_object = JointPub() """ For this version, we consider 5 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5) Dont Move 6) Perform One Jump """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed()
def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.monoped_state_object = MonopedState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5) self.monoped_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.monoped_joint_pubisher_object = JointPub() """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed()
class CartPole3DEnv(gym.Env): def __init__(self): self.publishers_array = [] self._base_pub = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command', Float64, queue_size=1) # self._pole_pub = rospy.Publisher('/cartpole_v0/pole_joint_velocity_controller/command', Float64, queue_size=1) self.publishers_array.append(self._base_pub) # self.publishers_array.append(self._pole_pub) self.action_space = spaces.Discrete(2) #l,r,L,R,nothing self._seed() #get configuration parameters self.min_pole_angle = rospy.get_param('/cart_pole_3d/min_angle') self.max_pole_angle = rospy.get_param('/cart_pole_3d/max_angle') self.max_base_velocity = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value') self.min_base_position = -rospy.get_param('/cart_pole_3d/max_distance') self.max_base_position = rospy.get_param('/cart_pole_3d/max_distance') self.pos_step = rospy.get_param('/cart_pole_3d/pos_step') self.running_step = rospy.get_param('/cart_pole_3d/running_step') self.init_pos = rospy.get_param('/cart_pole_3d/init_cart_vel') self.wait_time = rospy.get_param('/cart_pole_3d/wait_time') self.init_internal_vars(self.init_pos) rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback) # stablishes connection with simulator self.controllers_list = ['cart_joint_velocity_controller'] self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="cart_pole_3d", controllers_list=self.controllers_list) def init_internal_vars(self, init_pos_value): self.pos = [init_pos_value] self.joints = None #always returns the current state of the joints def joints_callback(self, data): self.joints = data def get_clock_time(self): self.clock_time = None while self.clock_time is None and not rospy.is_shutdown(): try: self.clock_time = rospy.wait_for_message("/clock", Clock, timeout=1.0) rospy.logdebug("Current clock_time READY=>" + str(self.clock_time)) except: rospy.logdebug("Current clock_time not ready yet, retrying for getting Current clock_time") return self.clock_time def _seed(self, seed=None): #overriden function self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action):#overriden function # Take action if action == 0: #LEFT rospy.logwarn("GO LEFT...") self.pos[0] -= self.pos_step elif action == 1: #RIGHT rospy.logwarn("GO RIGHT...") self.pos[0] += self.pos_step rospy.logwarn("MOVING TO POS=="+str(self.pos)) # 1st: unpause simulation rospy.logdebug("Unpause SIM...") self.gazebo.unpauseSim() self.move_joints(self.pos) rospy.logdebug("Wait for some time to execute movement, time="+str(self.running_step)) rospy.sleep(self.running_step) #wait for some time rospy.logdebug("DONE Wait for some time to execute movement, time=" + str(self.running_step)) # 3rd: pause simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # 4th: get the observation observation, done, state = self.observation_checks() # 5th: get the reward if not done: step_reward = 0 obs_reward = self.get_reward_for_observations(state) rospy.loginfo("Reward Values: Time="+str(step_reward)+",Obs="+str(obs_reward)) reward = step_reward + int(obs_reward) rospy.loginfo("TOT Reward=" + str(reward)) else: reward = -2000000 now = rospy.get_rostime() # TODO: Seems that gym version 0.8 doesnt support this very well. return observation, reward, done, {} def _reset(self): rospy.loginfo("We UNPause the simulation to start having topic data") self.gazebo.unpauseSim() rospy.loginfo("CLOCK BEFORE RESET") self.get_clock_time() rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA") # self.check_all_systems_ready() self.check_joint_states_ready() rospy.loginfo("SETTING INITIAL POSE TO AVOID") self.set_init_pose() time.sleep(self.wait_time * 2.0) #rospy.logdebug("We deactivate gravity to check any reasidual effect of reseting the simulation") #self.gazebo.change_gravity(0.0, 0.0, 0.0) rospy.loginfo("RESETING SIMULATION") self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") self.get_clock_time() rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") self.controllers_object.reset_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") # self.check_all_systems_ready() self.check_joint_states_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN") self.get_clock_time() #rospy.logdebug("We reactivating gravity...") #self.gazebo.change_gravity(0.0, 0.0, -9.81) rospy.loginfo("END") # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # get the last observation got when paused, generated by the callbakc or the check_all_systems_ready # Depends on who was last observation, _, state = self.observation_checks() return observation ''' UTILITY CODE FOLLOWS HERE ''' def observation_checks(self): done = False data = self.joints # base_postion base_velocity pole angle pole velocity state = [round(data.position[1],1), round(data.velocity[1],1), round(data.position[0],3), round(data.velocity[0],3)] # pole angle pole velocity rospy.loginfo("BASEPOSITION=="+str(state[0])) rospy.loginfo("POLE ANGLE==" + str(state[2])) if (self.min_base_position >= state[0] or state[0] >= self.max_base_position): #check if the base is still within the ranges of (-2, 2) rospy.logerr("Base Ouside Limits==>min="+str(self.min_base_position)+",pos="+str(state[0])+",max="+str(self.max_base_position)) done = True if (self.min_pole_angle >= state[2] or state[2] >= self.max_pole_angle): #check if pole has toppled over rospy.logerr( "Pole Angle Ouside Limits==>min=" + str(self.min_pole_angle) + ",pos=" + str(state[2]) + ",max=" + str( self.max_pole_angle)) done = True observations = [state[2]] return observations, done, state def get_reward_for_observations(self, state): """ Gives more points for staying upright, gets data from given observations to avoid having different data than other previous functions :return:reward """ pole_angle = state[2] pole_vel = state[3] rospy.logwarn("pole_angle for reward==>" + str(pole_angle)) delta = 0.7 - abs(pole_angle) reward_pole_angle = math.exp(delta*10) # If we are moving to the left and the pole is falling left is Bad rospy.logwarn("pole_vel==>" + str(pole_vel)) pole_vel_sign = numpy.sign(pole_vel) pole_angle_sign = numpy.sign(pole_angle) rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign)) rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign)) # We want inverted signs for the speeds. We multiply by -1 to make minus positive. # global_sign + = GOOD, global_sign - = BAD base_reward = 500 if pole_vel != 0: global_sign = pole_angle_sign * pole_vel_sign * -1 reward_for_efective_movement = base_reward * global_sign else: # Is a particular case. If it doesnt move then its good also reward_for_efective_movement = base_reward reward = reward_pole_angle + reward_for_efective_movement rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement)) return reward def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(10) # 10hz while (self._base_pub.get_num_connections() == 0 and not rospy.is_shutdown()): rospy.loginfo("No susbribers to _base_pub yet so we wait and try again") try: rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.loginfo("_base_pub Publisher Connected") def check_all_systems_ready(self, init=True): self.base_position = None while self.base_position is None and not rospy.is_shutdown(): try: self.base_position = rospy.Subscriber("/cart_pole_3d/joint_states", JointState, timeout=1.0) rospy.logdebug("Current /cart_pole_3d/joint_states READY=>"+str(self.base_position)) if init: # We Check all the sensors are in their initial values positions_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.position) velocity_ok = all(abs(i) <= 1.0e-02 for i in self.base_position.velocity) efforts_ok = all(abs(i) <= 1.0e-01 for i in self.base_position.effort) base_data_ok = positions_ok and velocity_ok and efforts_ok rospy.logdebug("Checking Init Values Ok=>" + str(base_data_ok)) except: rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states") rospy.logdebug("ALL SYSTEMS READY") def check_joint_states_ready(self): self.base_position = None while self.base_position is None and not rospy.is_shutdown(): try: self.base_position = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0) # check response from this topic for 1 second. if don't received respone, it mean not ready. # Assure data channels are working perfectly. rospy.logdebug("Current /cart_pole_3d/joint_states READY=>" + str(self.base_position)) except: rospy.logerr("Current /cart_pole_3d/joint_states not ready yet, retrying for getting joint_states") def move_joints(self, joints_array): joint_value = Float64() joint_value.data = joints_array[0] rospy.logdebug("Single Base JointsPos>>"+str(joint_value)) self._base_pub.publish(joint_value) def set_init_pose(self): """ Sets joints to initial position [0,0,0] :return: """ self.check_publishers_connection() # Reset Internal pos variable self.init_internal_vars(self.init_pos) self.move_joints(self.pos)
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): # rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) rospy.Subscriber("/robotiq/rightcam/image_raw_right", Image, self.r_image_callback) rospy.Subscriber("/robotiq/leftcam/image_raw_left", Image, self.l_image_callback) # Gets training parameters from param server self.running_step = rospy.get_param("/running_step") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [ shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0 ] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [ shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1 ] shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [ shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2 ] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") init_pos0 = self.init_joints_pose(self.init_joint_pose0) self.arr_init_pos0 = np.array(init_pos0, dtype='float32') init_pos1 = self.init_joints_pose(self.init_joint_pose1) self.arr_init_pos1 = np.array(init_pos1, dtype='float32') init_pos2 = self.init_joints_pose(self.init_joint_pose2) self.arr_init_pos2 = np.array(init_pos2, dtype='float32') self.init_grp_pose1 = [ r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1 ] self.init_grp_pose2 = [ r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2 ] init_g_pos1 = self.init_joints_pose(self.init_grp_pose1) self.arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32') init_g_pos2 = self.init_joints_pose(self.init_grp_pose2) self.arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32') # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # Get tolerances of door_frame self.tolerances = rospy.get_param("/door_frame_tolerances") # Get observation parameters self.joint_n = rospy.get_param("/obs_params/joint_n") self.eef_n = rospy.get_param("/obs_params/eef_n") self.eef_rpy_n = rospy.get_param("/obs_params/eef_rpy_n") self.force_n = rospy.get_param("/obs_params/force_n") self.torque_n = rospy.get_param("/obs_params/torque_n") self.image_n = rospy.get_param("/obs_params/image_n") self.min_static_limit = rospy.get_param("/min_static_limit") self.max_static_limit = rospy.get_param("/max_static_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.door_frame = Point() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.right_image = Image() self.right_image_ini = [] self.left_image = Image() self.lift_image_ini = [] self.end_effector = Point() self.previous_action = copy.deepcopy(self.arr_init_pos2) self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(n_act) self.observation_space = obs_dim #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service( '/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service( '/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) self.pos_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'pos_traj_controller' ] self.pos_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller" ] self.vel_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'vel_traj_controller' ] self.vel_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller" ] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig) def check_stop_flg(self): if self.stop_flag is False: return False else: return True def _start_trainnig(self, req): rospy.logdebug("_start_trainnig!!!!") self.stop_flag = False return SetBoolResponse(True, "_start_trainnig") def _stop_trainnig(self, req): rospy.logdebug("_stop_trainnig!!!!") self.stop_flag = True return SetBoolResponse(True, "_stop_trainnig") def _set_pos_ctrl(self, req): rospy.wait_for_service('set_position_controller') self._ctrl_conn.stop_controllers(self.pos_traj_controller) self._ctrl_conn.start_controllers(self.pos_controller) self._ctrl_type = 'pos' return SetBoolResponse(True, "_set_pos_ctrl") def _set_traj_pos_ctrl(self, req): rospy.wait_for_service('set_trajectory_position_controller') self._ctrl_conn.stop_controllers(self.pos_controller) self._ctrl_conn.start_controllers(self.pos_traj_controller) self._ctrl_type = 'traj_pos' return SetBoolResponse(True, "_set_traj_pos_ctrl") def _set_vel_ctrl(self, req): rospy.wait_for_service('set_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_traj_controller) self._ctrl_conn.start_controllers(self.vel_controller) self._ctrl_type = 'vel' return SetBoolResponse(True, "_set_vel_ctrl") def _set_traj_vel_ctrl(self, req): rospy.wait_for_service('set_trajectory_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_controller) self._ctrl_conn.start_controllers(self.vel_traj_controller) self._ctrl_type = 'traj_vel' return SetBoolResponse(True, "_set_traj_vel_ctrl") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: self._ctrl_conn.start_controllers( controllers_on="joint_state_controller") rospy.logdebug( "Current joint_states not ready yet, retrying==>" + str(e)) link_states_msg = None while link_states_msg is None and not rospy.is_shutdown(): try: link_states_msg = rospy.wait_for_message("/gazebo/link_states", LinkStates, timeout=0.1) self.link_states = link_states_msg rospy.logdebug("Reading link_states READY") except Exception as e: rospy.logdebug( "Reading link_states not ready yet, retrying==>" + str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_current_xyz(self): """Get x,y,z coordinates according to currrent joint angles Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = euler_from_quaternion( [self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose = [] self.current_joint_pose = copy.deepcopy(init_pos) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self, msg): self.joints_state = msg def wrench_stamped_callback(self, msg): self.wrench_stamped = msg def link_state_callback(self, msg): self.link_state = msg self.end_effector = self.link_state.pose[12] self.imu_link = self.link_state.pose[5] self.door_frame = self.link_state.pose[1] self.door = self.link_state.pose[2] def r_image_callback(self, msg): self.right_image = msg def l_image_callback(self, msg): self.left_image = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state eef_rpy = Vector3() self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) shp_joint_ang = joint_states.position[2] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[0] wr1_joint_ang = joint_states.position[9] wr2_joint_ang = joint_states.position[10] wr3_joint_ang = joint_states.position[11] shp_joint_vel = joint_states.velocity[2] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[0] wr1_joint_vel = joint_states.velocity[9] wr2_joint_vel = joint_states.velocity[10] wr3_joint_vel = joint_states.velocity[11] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] eef_x, eef_y, eef_z = self.get_xyz(q) eef_x_ini, eef_y_ini, eef_z_ini = self.get_xyz(self.init_joint_pose2) eef_rpy = self.get_orientation(q) eef_rpy_ini = self.get_orientation(self.init_joint_pose2) r_image = self.right_image l_image = self.left_image observation = [] # rospy.logdebug("List of Observations==>"+str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append( (shp_joint_ang - self.init_joint_pose2[0]) * self.joint_n) elif obs_name == "shl_joint_ang": observation.append( (shl_joint_ang - self.init_joint_pose2[1]) * self.joint_n) elif obs_name == "elb_joint_ang": observation.append( (elb_joint_ang - self.init_joint_pose2[2]) * self.joint_n) elif obs_name == "wr1_joint_ang": observation.append( (wr1_joint_ang - self.init_joint_pose2[3]) * self.joint_n) elif obs_name == "wr2_joint_ang": observation.append( (wr2_joint_ang - self.init_joint_pose2[4]) * self.joint_n) elif obs_name == "wr3_joint_ang": observation.append( (wr3_joint_ang - self.init_joint_pose2[5]) * self.joint_n) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append((eef_x - eef_x_ini) * self.eef_n) elif obs_name == "eef_y": observation.append((eef_y - eef_y_ini) * self.eef_n) elif obs_name == "eef_z": observation.append((eef_z - eef_z_ini) * self.eef_n) elif obs_name == "eef_rpy_x": observation.append( (eef_rpy.x - eef_rpy_ini.x) * self.eef_rpy_n) elif obs_name == "eef_rpy_y": observation.append( (eef_rpy.y - eef_rpy_ini.y) * self.eef_rpy_n) elif obs_name == "eef_rpy_z": observation.append( (eef_rpy.z - eef_rpy_ini.z) * self.eef_rpy_n) elif obs_name == "force_x": observation.append((self.force.x - self.force_ini.x) / self.force_limit * self.force_n) elif obs_name == "force_y": observation.append((self.force.y - self.force_ini.y) / self.force_limit * self.force_n) elif obs_name == "force_z": observation.append((self.force.z - self.force_ini.z) / self.force_limit * self.force_n) elif obs_name == "torque_x": observation.append((self.torque.x - self.torque_ini.x) / self.torque_limit * self.torque_n) elif obs_name == "torque_y": observation.append((self.torque.y - self.torque_ini.y) / self.torque_limit * self.torque_n) elif obs_name == "torque_z": observation.append((self.torque.z - self.torque_ini.z) / self.torque_limit * self.torque_n) elif obs_name == "image_data": for x in range(0, 28): observation.append( (ord(r_image.data[x]) - ord(self.right_image_ini.data[x])) * self.image_n) for x in range(0, 28): observation.append( (ord(l_image.data[x]) - ord(self.left_image_ini.data[x])) * self.image_n) else: raise NameError('Observation Asked does not exist==' + str(obs_name)) # print("observation", list(map(round, observation, [3]*len(observation)))) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max( min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max( min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max( min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max( min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max( min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max( min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) def first_reset(self): # print("first reset") jointtrajpub = JointTrajPub() for update in range(500): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos0) time.sleep(0.5) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1) time.sleep(0.5) # Resets the state of the environment and returns an initial observation. def reset(self): self.max_knob_rotation = 0 self.max_door_rotation = 0 self.max_wirst3 = 0 self.min_wirst3 = 0 self.max_wirst2 = 0 self.min_wirst2 = 0 self.max_wirst1 = 0 self.min_wirst1 = 0 self.max_elb = 0 self.min_elb = 0 self.max_shl = 0 self.min_shl = 0 self.max_shp = 0 self.min_shp = 0 self.max_force_x = 0 self.min_force_x = 0 self.max_force_y = 0 self.min_force_y = 0 self.max_force_z = 0 self.min_force_z = 0 self.max_torque_x = 0 self.min_torque_x = 0 self.max_torque_y = 0 self.min_torque_y = 0 self.max_torque_z = 0 self.min_torque_z = 0 # Go to initial position self._gz_conn.unpauseSim() # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0)) jointtrajpub = JointTrajPub() for update in range(200): jointtrajpub.GrpCommand(self.arr_init_g_pos1) # time.sleep(1) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2) time.sleep(1) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1) time.sleep(1) # 0st: We pause the Simulator # rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values # rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation # rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity_zero() # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems # rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1)) # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2)) self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("self.force", self.force) # print("self.torque", self.torque) self.force_ini = copy.deepcopy(self.force) self.torque_ini = copy.deepcopy(self.torque) # We save that position as the current joint desired position # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_pos': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'pos': self._joint_pubisher.check_publishers_connection() elif self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints # rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original # rospy.logdebug("Restore Gravity...") self._gz_conn.adjust_gravity() for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2) time.sleep(1) for update in range(200): jointtrajpub.GrpCommand(self.arr_init_g_pos2) time.sleep(1) # 7th: pauses simulation # rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() self.right_image_ini = copy.deepcopy(self.right_image) self.left_image_ini = copy.deepcopy(self.left_image) # 8th: Get the State Discrete Stringuified version of the observations # rospy.logdebug("get_observations...") observation = self.get_observations() # print("[observations]", observation) return observation def _act(self, action): if self._ctrl_type == 'traj_pos': self.pre_ctrl_type = 'traj_pos' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'pos': self.pre_ctrl_type = 'pos' self._joint_pubisher.move_joints(action) elif self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def training_ok(self): rate = rospy.Rate(1) while self.check_stop_flg() is True: rospy.logdebug("stop_flag is ON!!!!") self._gz_conn.unpauseSim() if self.check_stop_flg() is False: break rate.sleep() def step(self, action, update): ''' ('action: ', array([ 0., 0. , -0., -0., -0. , 0. ], dtype=float32)) ''' # rospy.logdebug("UR step func") # define the logger self.training_ok() # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() if self.max_wirst3 < action[5]: self.max_wirst3 = action[5] if self.min_wirst3 > action[5]: self.min_wirst3 = action[5] if self.max_wirst2 < action[4]: self.max_wirst2 = action[4] if self.min_wirst2 > action[4]: self.min_wirst2 = action[4] if self.max_wirst1 < action[3]: self.max_wirst1 = action[3] if self.min_wirst1 > action[3]: self.min_wirst1 = action[3] if self.max_elb < action[2]: self.max_elb = action[2] if self.min_elb > action[2]: self.min_elb = action[2] if self.max_shl < action[1]: self.max_shl = action[1] if self.min_shl > action[1]: self.min_shl = action[1] if self.max_shp < action[0]: self.max_shp = action[0] if self.min_shp > action[0]: self.min_shp = action[0] # print("action", action) action = action + self.arr_init_pos2 # action = [1.488122534496775, -1.4496597816566892, 2.4377209990850974, 2.168370898415174, -1.4670589583209175, 1.4] self._act(action) self.wrench_stamped self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("force", self.force) # print("torque", self.torque) if self.max_force_x < self.force.x: self.max_force_x = self.force.x if self.min_force_x > self.force.x: self.min_force_x = self.force.x if self.max_force_y < self.force.y: self.max_force_y = self.force.y if self.min_force_y > self.force.y: self.min_force_y = self.force.y if self.max_force_z < self.force.z: self.max_force_z = self.force.z if self.min_force_z > self.force.z: self.min_force_z = self.force.z if self.max_torque_x < self.torque.x: self.max_torque_x = self.torque.x if self.min_torque_x > self.torque.x: self.min_torque_x = self.torque.x if self.max_torque_y < self.torque.y: self.max_torque_y = self.torque.y if self.min_torque_y > self.torque.y: self.min_torque_y = self.torque.y if self.max_torque_z < self.torque.z: self.max_torque_z = self.torque.z if self.min_torque_z > self.torque.z: self.min_torque_z = self.torque.z if self.force_limit < self.force.x or self.force.x < -self.force_limit: self._act(self.previous_action) # print("force.x over the limit") elif self.force_limit < self.force.y or self.force.y < -self.force_limit: self._act(self.previous_action) # print("force.y over the limit") elif self.force_limit < self.force.z or self.force.z < -self.force_limit: self._act(self.previous_action) # print("force.z over the limit") elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: self._act(self.previous_action) # print("torque.x over the limit") elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: self._act(self.previous_action) # print("torque.y over the limit") elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: self._act(self.previous_action) # print("torque.z over the limit") else: self.previous_action = copy.deepcopy(action) # print("True") self.min_static_taxel0 = 0 self.min_static_taxel1 = 0 self.max_static_taxel0 = 0 self.max_static_taxel1 = 0 r_image = self.right_image l_image = self.left_image for x in range(0, 28): if self.min_static_taxel0 > (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n: self.min_static_taxel0 = (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n if self.min_static_taxel1 > (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n: self.min_static_taxel1 = (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n if self.max_static_taxel0 < (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n: self.max_static_taxel0 = (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n if self.max_static_taxel1 < (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n: self.max_static_taxel1 = (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n # print("min, max taxel", self.min_static_taxel0, self.max_static_taxel0, self.min_static_taxel1, self.max_static_taxel1) # Then we send the command to the robot and let it go for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards(action, update) done = self.check_done(update) return observation, reward, done, {} def compute_dist_rewards(self, action, update): self.quat = self.door.orientation self.door_rpy = self.cvt_quat_to_euler(self.quat) self.quat = self.imu_link.orientation self.imu_link_rpy = self.cvt_quat_to_euler(self.quat) compute_rewards = 0 knob_c = 100 #1 rotation of knob(+) knob_bonus_c = 10 #2 bonus of knob rotation(+) panel_c = 50 #3 door panel open(+) panel_b_c = 50 #4 door panel before open(-) tolerances_c = 50 #5 movement of door frame(-) force_c = 1 #6 over force limit1(-) taxel_c = 100 #7 release the knob(-) act_0_n = 10 #8 action[0] (-) act_1_n = 10 # action[1] (-) act_2_n = 10 # action[2] (-) act_3_n = 10 # action[3] (-) act_4_n = 10 # action[4] (-) act_5_n = 10 # action[5] (-) #1 rotation of knob(+), #2 bonus of knob rotation(+), #3 door panel open(+), if self.imu_link_rpy.x < 0.8: compute_rewards = self.imu_link_rpy.x * knob_c print("reward_knob_rotation", compute_rewards) if 0.4 > self.imu_link_rpy.x > 0.2: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c elif 0.6 > self.imu_link_rpy.x > 0.4: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 2 elif 0.8 > self.imu_link_rpy.x > 0.6: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 3 else: compute_rewards = 0.8 * knob_c + knob_bonus_c * 3 + ( 1.5708061 - self.imu_link_rpy.z) * panel_c #5 movement of door frame(-) if abs(self.door_frame.position.x + 0.0659) > self.tolerances or abs( self.door_frame.position.y - 0.5649) > self.tolerances or abs(self.door_frame.position.z - 0.0999) > self.tolerances: compute_rewards -= tolerances_c * (n_step - update) / n_step + tolerances_c print("door_frame limit", compute_rewards) #6 over force limit1(-) if self.force_limit < self.force.x or self.force.x < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_x limit", compute_rewards) if self.force_limit < self.force.y or self.force.y < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_y limit", compute_rewards) if self.force_limit < self.force.z or self.force.z < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_z limit", compute_rewards) if self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_x limit", compute_rewards) if self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_y limit", compute_rewards) if self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_z limit", compute_rewards) #7 release the knob(-) if self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit: compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c print("min_static limit", compute_rewards) if self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit: compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c print("max_static limit", compute_rewards) #8 joint(+, -) action = action - self.arr_init_pos2 if action[5] < -0.005: compute_rewards -= (-0.005 - action[5]) * act_5_n print("action5 limit", compute_rewards) elif 1 < action[5]: compute_rewards -= (action[5] - 1) * act_5_n print("action5 limit", compute_rewards) if action[4] < -0.005: compute_rewards -= (-0.005 - action[4]) * act_4_n print("action4 limit", compute_rewards) elif 0.03 < action[4]: compute_rewards -= (action[4] - 0.03) * act_4_n print("action4 limit", compute_rewards) if action[3] < -0.023: compute_rewards -= (-0.023 - action[3]) * act_3_n print("action3 limit", compute_rewards) elif 0.005 < action[3]: compute_rewards -= (action[3] - 0.005) * act_3_n print("action3 limit", compute_rewards) if action[2] < -0.005: compute_rewards -= (-0.005 - action[2]) * act_2_n print("action2 limit", compute_rewards) elif 0.14 < action[2]: compute_rewards -= (action[2] - 0.14) * act_2_n print("action2 limit", compute_rewards) if action[1] < -0.11: compute_rewards -= (-0.11 - action[1]) * act_1_n print("action1 limit", compute_rewards) elif 0.005 < action[1]: compute_rewards -= (action[1] - 0.005) * act_1_n print("action1 limit", compute_rewards) if action[0] < -0.015: compute_rewards -= (-0.015 - action[0]) * act_0_n print("action0 limit", compute_rewards) if 0.005 < action[0]: compute_rewards -= (action[0] - 0.005) * act_0_n print("action0 limit", compute_rewards) if self.max_knob_rotation < self.imu_link_rpy.x: self.max_knob_rotation = self.imu_link_rpy.x if self.max_door_rotation < 1.5708061 - self.imu_link_rpy.z: self.max_door_rotation = 1.5708061 - self.imu_link_rpy.z # print("imu_link_rpy", self.imu_link_rpy) # print("door_frame", self.door_frame.position.x + 0.0659, self.door_frame.position.y - 0.5649, self.door_frame.position.z - 0.0999) return compute_rewards def check_done(self, update): if update > 1: if abs(self.door_frame.position.x + 0.0659 ) > self.tolerances or abs(self.door_frame.position.y - 0.5649) > self.tolerances or abs( self.door_frame.position.z - 0.0999) > self.tolerances: print( "########## door frame position over the limit ##########", update) return True elif self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit: print("########## static_taxles over the min_limit ##########", update) return True elif self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit: print("########## static_taxles over the max_limit ##########", update) return True else: return False else: return False
class JointPub(object): def __init__(self): self.publishers_array = [] self._shoulder_pan_joint_pub = rospy.Publisher( '/ur_shoulder_pan_vel_controller/command', Float64, queue_size=1) self._shoulder_lift_joint_pub = rospy.Publisher( '/ur_shoulder_lift_vel_controller/command', Float64, queue_size=1) self._elbow_vel_joint_pub = rospy.Publisher( '/ur_elbow_vel_controller/command', Float64, queue_size=1) self._wrist_1_joint_pub = rospy.Publisher( '/ur_wrist_1_vel_controller/command', Float64, queue_size=1) self._wrist_2_joint_pub = rospy.Publisher( '/ur_wrist_2_vel_controller/command', Float64, queue_size=1) self._wrist_3_joint_pub = rospy.Publisher( '/ur_wrist_3_vel_controller/command', Float64, queue_size=1) self.publishers_array.append(self._shoulder_pan_joint_pub) self.publishers_array.append(self._shoulder_lift_joint_pub) self.publishers_array.append(self._elbow_vel_joint_pub) self.publishers_array.append(self._wrist_1_joint_pub) self.publishers_array.append(self._wrist_2_joint_pub) self.publishers_array.append(self._wrist_3_joint_pub) self._ctrl_conn = ControllersConnection(namespace="") self._ctrl_conn.load_controllers("ur_shoulder_pan_vel_controller") self._ctrl_conn.load_controllers("ur_shoulder_lift_vel_controller") self._ctrl_conn.load_controllers("ur_elbow_vel_controller") self._ctrl_conn.load_controllers("ur_wrist_1_vel_controller") self._ctrl_conn.load_controllers("ur_wrist_2_vel_controller") self._ctrl_conn.load_controllers("ur_wrist_3_vel_controller") def set_init_pose(self, init_pose): """ Sets joints to initial position [0,0,0] :return: The init Pose """ self.check_publishers_connection() self.move_joints(init_pose) def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(1) # 1hz while (self._shoulder_pan_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _shoulder_pan_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_shoulder_pan_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_shoulder_pan_joint_pub Publisher Connected") while (self._shoulder_lift_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _shoulder_lift_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_shoulder_lift_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_shoulder_lift_joint_pub Publisher Connected") while (self._elbow_vel_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _elbow_vel_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_elbow_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_elbow_vel_joint_pub Publisher Connected") while (self._wrist_1_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _wrist_1_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_wrist_1_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_wrist_1_joint_pub Publisher Connected") while (self._wrist_2_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _wrist_2_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_wrist_2_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_wrist_2_joint_pub Publisher Connected") while (self._wrist_3_joint_pub.get_num_connections() == 0): rospy.logdebug( "No susbribers to _wrist_3_joint_pub yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="ur_wrist_3_vel_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_wrist_3_joint_pub Publisher Connected") rospy.logdebug("All Joint Publishers READY") def move_joints(self, joints_array): i = 0 for publisher_object in self.publishers_array: joint_value = Float64() joint_value.data = joints_array[i] rospy.logdebug("JointsPos>>" + str(joint_value)) publisher_object.publish(joint_value) i += 1 def start_loop(self, rate_value=2.0): rospy.logdebug("Start Loop") pos1 = [-2.873, 0.0, 6.28, 3.015, 1.21, 1.264, -0.97] pos2 = [-2.873, 0.0, 6.28, 3.015, 1.21, 1.264, 0.97] position = "pos1" rate = rospy.Rate(rate_value) while not rospy.is_shutdown(): if position == "pos1": self.move_joints(pos1) position = "pos2" else: self.move_joints(pos2) position = "pos1" rate.sleep() def start_sinus_loop(self, rate_value=2.0): rospy.logdebug("Start Loop") w = 0.0 x = 1.0 * math.sin(w) #pos_x = [0.0,0.0,x] pos_x = [x, 0.0, 0.0] #pos_x = [0.0, x, 0.0] rate = rospy.Rate(rate_value) while not rospy.is_shutdown(): self.move_joints(pos_x) w += 0.05 x = 1.0 * math.sin(w) #pos_x = [0.0, 0.0, x] pos_x = [x, 0.0, 0.0] #pos_x = [0.0, x, 0.0] print(x) rate.sleep()
class BaseEnv(gym.Env): def __init__(self, robot_name_space, controllers_list, reset_controls, start_init_physics_parameters=True, reset_world_or_sim="SIMULATION"): rospy.logdebug("START init BaseEnv") self.controllers_object = ControllersConnection( namespace=robot_name_space, controllers_list=controllers_list) self.reset_controls = reset_controls self.seed() rospy.logdebug("END init BaseEnv") # Env methods def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Function executed each time step. Here we get the action execute it in a time step and retrieve the observations generated by that action. :param action: :return: obs, reward, done, info """ """ Here we should convert the action num to movement action, execute the action in the simulation and get the observations result of performing that action. """ rospy.logdebug("START STEP ROS") self._set_action(action) obs = self._get_obs() done = self._is_done(obs) info = {} reward = self._compute_reward(obs, done) rospy.logdebug("END STEP ROS") return obs, reward, done, info def reset(self): rospy.logdebug("Reseting BaseEnv") self._init_env_variables() obs = self._get_obs() rospy.logdebug("END Reseting BaseEnv") return obs def close(self): """ Function executed when closing the environment. Use it for closing GUIS and other systems that need closing. :return: """ rospy.logdebug("Closing BaseEnv") rospy.signal_shutdown("Closing BaseEnv") def _reset_sim(self): """Resets a simulation """ rospy.logdebug("RESET SIM START") if self.reset_controls: rospy.logdebug("RESET CONTROLLERS") self.controllers_object.reset_controllers() self._check_all_systems_ready() self._set_init_pose() self.controllers_object.reset_controllers() self._check_all_systems_ready() else: rospy.logwarn("DONT RESET CONTROLLERS") self._check_all_systems_ready() self._set_init_pose() self._check_all_systems_ready() rospy.logdebug("RESET SIM END") return True def _set_init_pose(self): """Sets the Robot in its init pose """ raise NotImplementedError() def _check_all_systems_ready(self): """ Checks that all the sensors, publishers and other simulation systems are operational. """ raise NotImplementedError() def _get_obs(self): """Returns the observation. """ raise NotImplementedError() def _init_env_variables(self): """Inits variables needed to be initialised each time we reset at the start of an episode. """ raise NotImplementedError() def _set_action(self, action): """Applies the given action to the simulation. """ raise NotImplementedError() def _is_done(self, observations): """Indicates whether or not the episode is done ( the robot has fallen for example). """ raise NotImplementedError() def _compute_reward(self, observations, done): """Calculates the reward to give based on the observations given. """ raise NotImplementedError() def _env_setup(self, initial_qpos): """Initial configuration of the environment. Can be used to configure initial state and extract information from the simulation. """ raise NotImplementedError()
class OldMovingCubeEnv(gym.Env): def __init__(self): self.publishers_array = [] self._roll_vel_pub = rospy.Publisher( '/moving_cube/inertia_wheel_roll_joint_velocity_controller/command', Float64, queue_size=1) self.publishers_array.append(self._roll_vel_pub) self.action_space = spaces.Discrete(3) #l,r,nothing self._seed() #get configuration parameters self.wait_time = rospy.get_param('/moving_cube/wait_time') self.running_step = rospy.get_param('/moving_cube/running_step') self.speed_step = rospy.get_param('/moving_cube/speed_step') self.init_roll_vel = rospy.get_param('/moving_cube/init_roll_vel') self.init_internal_vars(self.init_roll_vel) rospy.Subscriber("/moving_cube/joint_states", JointState, self.joints_callback) rospy.Subscriber("/moving_cube/odom", Odometry, self.odom_callback) # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection( namespace="moving_cube") def init_internal_vars(self, init_roll_vel_value): self.roll_vel = [init_roll_vel_value] self.joints = None #always returns the current state of the joints def joints_callback(self, data): self.joints = data def odom_callback(self, data): self.odom = data def get_clock_time(self): self.clock_time = None while self.clock_time is None and not rospy.is_shutdown(): try: self.clock_time = rospy.wait_for_message("/clock", Clock, timeout=1.0) rospy.logdebug("Current clock_time READY=>" + str(self.clock_time)) except: rospy.logdebug( "Current clock_time not ready yet, retrying for getting Current clock_time" ) return self.clock_time def _seed(self, seed=None): #overriden function self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): #overriden function # Take action if action == 0: #LEFT rospy.logwarn("GO LEFT...") self.roll_vel[0] -= self.speed_step elif action == 1: #RIGHT rospy.logwarn("GO RIGHT...") self.roll_vel[0] += self.speed_step elif action == 1: #STOP rospy.logwarn("STOP...") self.roll_vel[0] = 0.0 rospy.logwarn("MOVING TO SPEED==" + str(self.roll_vel)) # 1st: unpause simulation rospy.logdebug("Unpause SIM...") self.gazebo.unpauseSim() self.move_joints(self.roll_vel) rospy.logdebug("Wait for some time to execute movement, time=" + str(self.running_step)) rospy.sleep(self.running_step) #wait for some time rospy.logdebug("DONE Wait for some time to execute movement, time=" + str(self.running_step)) # 3rd: pause simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # 4th: get the observation observation, done, state = self.observation_checks() # 5th: get the reward if not done: step_reward = 0 obs_reward = self.get_reward_for_observations(state) rospy.loginfo("Reward Values: Time=" + str(step_reward) + ",Obs=" + str(obs_reward)) reward = step_reward + int(obs_reward) rospy.loginfo("TOT Reward=" + str(reward)) else: reward = -2000000 return observation, reward, done, {} def _reset(self): rospy.logdebug("We UNPause the simulation to start having topic data") self.gazebo.unpauseSim() rospy.logdebug("CLOCK BEFORE RESET") self.get_clock_time() rospy.logdebug("SETTING INITIAL POSE TO AVOID") self.set_init_pose() time.sleep(self.wait_time * 2.0) rospy.logdebug("AFTER INITPOSE CHECKING SENSOR DATA") self.check_all_systems_ready() rospy.logdebug("RESETING SIMULATION") self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() rospy.logdebug("CLOCK AFTER RESET") self.get_clock_time() rospy.logdebug( "RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") self.controllers_object.reset_cartpole_joint_controllers() rospy.logdebug("AFTER RESET CHECKING SENSOR DATA") self.check_all_systems_ready() rospy.logdebug("CLOCK AFTER SENSORS WORKING AGAIN") self.get_clock_time() rospy.logdebug("END") # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # get the last observation got when paused, generated by the callbakc or the check_all_systems_ready # Depends on who was last observation, _, state = self.observation_checks() return observation ''' UTILITY CODE FOLLOWS HERE ''' def observation_checks(self): done = False # State defined by the speed of the disk and position in the world plane self.get_distance_from_point(pstart, p_end) state = [ round(self.joints.velocity[0], 1), round(self.odom.pose.pose.position.x, 1), round(self.odom.pose.pose.position.y, 1) ] # TODO: Create Correct Observation if (self.min_base_position >= state[0] or state[0] >= self.max_base_position ): #check if the base is still within the ranges of (-2, 2) rospy.logerr("Base Ouside Limits==>min=" + str(self.min_base_position) + ",pos=" + str(state[0]) + ",max=" + str(self.max_base_position)) done = True if (self.min_pole_angle >= state[2] or state[2] >= self.max_pole_angle): #check if pole has toppled over rospy.logerr("Pole Angle Ouside Limits==>min=" + str(self.min_pole_angle) + ",pos=" + str(state[2]) + ",max=" + str(self.max_pole_angle)) done = True observations = [state[2]] return observations, done, state def get_distance_from_point(self, pstart, p_end): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((pstart.x, pstart.y, pstart.z)) b = numpy.array((p_end.x, p_end.y, p_end.z)) distance = numpy.linalg.norm(a - b) return distance def get_reward_for_observations(self, state): """ Gives more points for staying upright, gets data from given observations to avoid having different data than other previous functions :return:reward """ pole_angle = state[2] pole_vel = state[3] rospy.logwarn("pole_angle for reward==>" + str(pole_angle)) delta = 0.7 - abs(pole_angle) reward_pole_angle = math.exp(delta * 10) # If we are moving to the left and the pole is falling left is Bad rospy.logwarn("pole_vel==>" + str(pole_vel)) pole_vel_sign = numpy.sign(pole_vel) pole_angle_sign = numpy.sign(pole_angle) rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign)) rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign)) # We want inverted signs for the speeds. We multiply by -1 to make minus positive. # global_sign + = GOOD, global_sign - = BAD base_reward = 500 if pole_vel != 0: global_sign = pole_angle_sign * pole_vel_sign * -1 reward_for_efective_movement = base_reward * global_sign else: # Is a particular case. If it doesnt move then its good also reward_for_efective_movement = base_reward reward = reward_pole_angle + reward_for_efective_movement rospy.logwarn("reward==>" + str(reward) + "= r_pole_angle=" + str(reward_pole_angle) + ",r_movement= " + str(reward_for_efective_movement)) return reward def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(10) # 10hz while (self._roll_vel_pub.get_num_connections() == 0 and not rospy.is_shutdown()): rospy.logdebug( "No susbribers to _roll_vel_pub yet so we wait and try again") try: rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_base_pub Publisher Connected") rospy.logdebug("All Publishers READY") def check_all_systems_ready(self, init=True): self.disk_joints_data = None while self.disk_joints_data is None and not rospy.is_shutdown(): try: self.disk_joints_data = rospy.wait_for_message( "/moving_cube/joint_states", JointState, timeout=1.0) rospy.logdebug("Current moving_cube/joint_states READY=>" + str(self.disk_joints_data)) except: rospy.logerr( "Current moving_cube/joint_states not ready yet, retrying for getting joint_states" ) self.cube_odom_data = None while self.disk_joints_data is None and not rospy.is_shutdown(): try: self.cube_odom_data = rospy.wait_for_message( "/moving_cube/odom", Odometry, timeout=1.0) rospy.logdebug("Current /moving_cube/odom READY=>" + str(self.cube_odom_data)) except: rospy.logerr( "Current /moving_cube/odom not ready yet, retrying for getting odom" ) rospy.logdebug("ALL SYSTEMS READY") def move_joints(self, joints_array): i = 0 for joint_value in joints_array: joint_value = Float64() joint_value.data = joints_array[0] rospy.logdebug("Single Disk Joints Velocity>>" + str(joint_value)) self.publishers_array[i].publish(joint_value) i += 0 def set_init_pose(self): """ Sets Roll Disk to initial vel [0.0] :return: """ self.check_publishers_connection() # Reset Internal pos variable self.init_internal_vars(self.init_roll_vel) self.move_joints(self.roll_vel)
class JointTrajPub(object): def __init__(self): """ Publish trajectory_msgs::JointTrajectory for velocity control """ self._joint_traj_pub = rospy.Publisher('/vel_traj_controller/command', JointTrajectory, queue_size=10) self._ctrl_conn = ControllersConnection(namespace="") self._ctrl_conn.load_controllers("vel_traj_controller") def set_init_pose(self, init_pose): """ Sets joints to initial position [0,0,0] :return: The init Pose """ self.check_publishers_connection() self.move_joints(init_pose) def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(1) # 1hz while (self._joint_traj_pub.get_num_connections() == 0): rospy.logdebug( "No subscribers to vel_traj_controller yet so we wait and try again" ) try: self._ctrl_conn.start_controllers( controllers_on="vel_traj_controller") rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_joint_traj_pub Publisher Connected") rospy.logdebug("All Joint Publishers READY") def move_joints(self, joints_array): vel_cmd = JointTrajectory() vel_cmd.joint_names = [ "shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint" ] vel_cmd.header.stamp = rospy.Time.now() # create a JTP instance and configure it jtp = JointTrajectoryPoint(positions=[0] * 6, velocities=[0] * 6, time_from_start=rospy.Duration(.0)) jtp.velocities = [ joints_array[0], joints_array[1], joints_array[2], joints_array[3], joints_array[4], joints_array[5] ] # setup the reset of the pt vel_cmd.points = [jtp] self._joint_traj_pub.publish(vel_cmd) def jointTrajectoryCommand(self, joints_array): rospy.loginfo("jointTrajectoryCommand") try: rospy.loginfo(rospy.get_rostime().to_sec()) while rospy.get_rostime().to_sec() == 0.0: time.sleep(0.1) rospy.loginfo(rospy.get_rostime().to_sec()) jt = JointTrajectory() jt.header.stamp = rospy.Time.now() jt.header.frame_id = "ur5" jt.joint_names.append("shoulder_pan_joint") jt.joint_names.append("shoulder_lift_joint") jt.joint_names.append("elbow_joint") jt.joint_names.append("wrist_1_joint") jt.joint_names.append("wrist_2_joint") jt.joint_names.append("wrist_3_joint") dt = 0.01 p = JointTrajectoryPoint() p.positions.append(joints_array[0]) p.positions.append(joints_array[1]) p.positions.append(joints_array[2]) p.positions.append(joints_array[3]) p.positions.append(joints_array[4]) p.positions.append(joints_array[5]) jt.points.append(p) # set duration jt.points[0].time_from_start = rospy.Duration.from_sec(dt) rospy.loginfo("Test: velocities") self._joint_traj_pub.publish(jt) except rospy.ROSInterruptException: pass def start_loop(self, rate_value=2.0): rospy.logdebug("Start Loop") pos1 = [-2.873, 0.0, 6.28, 3.015, 1.21, 1.264, -0.97] pos2 = [-2.873, 0.0, 6.28, 3.015, 1.21, 1.264, 0.97] position = "pos1" rate = rospy.Rate(rate_value) while not rospy.is_shutdown(): if position == "pos1": self.move_joints(pos1) position = "pos2" else: self.move_joints(pos2) position = "pos1" rate.sleep() def start_sinus_loop(self, rate_value=2.0): rospy.logdebug("Start Loop") w = 0.0 x = 1.0 * math.sin(w) #pos_x = [0.0,0.0,x] pos_x = [x, 0.0, 0.0] #pos_x = [0.0, x, 0.0] rate = rospy.Rate(rate_value) while not rospy.is_shutdown(): self.move_joints(pos_x) w += 0.05 x = 1.0 * math.sin(w) #pos_x = [0.0, 0.0, x] pos_x = [x, 0.0, 0.0] #pos_x = [0.0, x, 0.0] print(x) rate.sleep()
break rospy.loginfo("Not there yet, keep waiting...") rate.sleep() delta_time = end_wait_time- start_wait_time rospy.logwarn("[Wait Time=" + str(delta_time)+"]") return delta_time if __name__ == '__main__': position = 0.0 position = float(sys.argv[1]) rospy.init_node('debug_test_node', anonymous=True, log_level=rospy.WARN) rospy.logwarn("[position=" + str(position) + "]") wait_time = 0.1 controllers_object = ControllersConnection(namespace="cartpole_v0") debug_object = MoveCartClass() gazebo = GazeboConnection() rospy.loginfo("RESETING SIMULATION") gazebo.pauseSim() gazebo.resetSim() gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") debug_object.get_clock_time() rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") controllers_object.reset_cartpole_joint_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN")
def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.link_state = LinkStates() self.joints_state = JointState() self.end_effector = Point() self.distance = 0. # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service( '/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) self.vel_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'vel_traj_controller' ] self.vel_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller" ] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig)
def position_reset_test(): """ Test of position accuracy and reset system. """ position = 0.0 position = float(sys.argv[1]) rospy.init_node('debug_test_node', anonymous=True, log_level=rospy.WARN) rospy.logwarn("[position=" + str(position) + "]") wait_time = 0.1 controllers_object = ControllersConnection(namespace="cartpole_v0") debug_object = MoveCartClass() gazebo = GazeboConnection() rospy.loginfo("RESETING SIMULATION") gazebo.pauseSim() gazebo.resetSim() gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") debug_object.get_clock_time() rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") controllers_object.reset_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN") debug_object.get_clock_time() rospy.loginfo("START CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("SET init pose...") debug_object.set_init_pose() rospy.loginfo("WAIT FOR GOING TO INIT POSE") time.sleep(wait_time) raw_input("Start Movement...PRESS KEY") i = 0 wait_times_m = [] while not rospy.is_shutdown(): pos_x = [position] debug_object.move_joints(pos_x) delta_time = debug_object.wait_until_base_is_in_pos(position) wait_times_m = numpy.append(wait_times_m, [delta_time]) debug_object.move_joints([0.0]) delta_time = debug_object.wait_until_base_is_in_pos(0.0) wait_times_m = numpy.append(wait_times_m, [delta_time]) i += 1 if i > 10: average_time = numpy.average(wait_times_m) rospy.logwarn("[average_time Wait Time=" + str(average_time) + "]") break rospy.loginfo("END CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK BEFORE RESET") debug_object.get_clock_time() # Reset Sim raw_input("END Start Movement...PRESS KEY") rospy.loginfo("SETTING INITIAL POSE TO AVOID") debug_object.set_init_pose() time.sleep(wait_time * 2.0) rospy.loginfo("AFTER INITPOSE CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo( "We deactivate gravity to check any reasidual effect of reseting the simulation" ) gazebo.change_gravity(0.0, 0.0, 0.0) rospy.loginfo("RESETING SIMULATION") gazebo.pauseSim() gazebo.resetSim() gazebo.unpauseSim() rospy.loginfo("CLOCK AFTER RESET") debug_object.get_clock_time() rospy.loginfo("RESETING CONTROLLERS SO THAT IT DOESNT WAIT FOR THE CLOCK") controllers_object.reset_controllers() rospy.loginfo("AFTER RESET CHECKING SENSOR DATA") debug_object.check_all_systems_ready() rospy.loginfo("CLOCK AFTER SENSORS WORKING AGAIN") debug_object.get_clock_time() rospy.loginfo("We reactivating gravity...") gazebo.change_gravity(0.0, 0.0, -9.81) rospy.loginfo("END")
class SimplicityEnv(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="simplicity") self.simplicity_state_object = SimplicityState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5 ) self.simplicity_state_object.set_desired_world_point(self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.simplicity_joint_pubisher_object = JointPub() """ For this version, we consider 6 actions 1-2: Increment/Decrement front_left_shoulder 3-4: Increment/Decrement front_left_thigh 5-6: Increment/Decrement front_left_shank 7-8: Increment/Decrement front_right_shoulder 9-10: Increment/Decrement front_right_thigh 11-12: Increment/Decrement front_right_shank 13-14: Increment/Decrement back_left_shoulder 15-16: Increment/Decrement back_left_thigh 17-18: Increment/Decrement back_left_shank 19-20: Increment/Decrement back_right_shoulder 21-22: Increment/Decrement back_right_thigh 23-24: Increment/Decrement back_right_shank """ self.action_space = spaces.Discrete(24) self.reward_range = (-np.inf, np.inf) self.seed() # A function to initialize the random generator def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_robot_joint_controllers...") self.controllers_object.reset_simplicity_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose...") self.simplicity_joint_pubisher_object.set_init_pose() # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.simplicity_state_object.check_all_systems_ready() rospy.logdebug("get_observations...") observation = self.simplicity_state_object.get_observations() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state def step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented next_action_position = self.simplicity_state_object.get_action_to_position(action) # We move it to that pos self.gazebo.unpauseSim() self.simplicity_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.simplicity_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward,done = self.simplicity_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.simplicity_state_object.get_state_as_string(observation)
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy('/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = {"shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1] # print("[init_joint_pose1]: ", [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1]) shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2] # print("[init_joint_pose2]: ", [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2]) shp_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shp") shl_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shl") elb_after_rotate = rospy.get_param("/eelink_pose_after_rotate/elb") wr1_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr1") wr2_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr2") wr3_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr3") self.after_rotate = [shp_after_rotate, shl_after_rotate, elb_after_rotate, wr1_after_rotate, wr2_after_rotate, wr3_after_rotate] shp_after_pull = rospy.get_param("/eelink_pose_after_pull/shp") shl_after_pull = rospy.get_param("/eelink_pose_after_pull/shl") elb_after_pull = rospy.get_param("/eelink_pose_after_pull/elb") wr1_after_pull = rospy.get_param("/eelink_pose_after_pull/wr1") wr2_after_pull = rospy.get_param("/eelink_pose_after_pull/wr2") wr3_after_pull = rospy.get_param("/eelink_pose_after_pull/wr3") self.after_pull = [shp_after_pull, shl_after_pull, elb_after_pull, wr1_after_pull, wr2_after_pull, wr3_after_pull] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") self.init_grp_pose1 = [r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1] self.init_grp_pose2 = [r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.end_effector = Point() self.previous_action =[] self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 21 #np.arange(self.get_observations().shape[0]) ## self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service('/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service('/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) # set_gripper_server = rospy.Service('/set_gripper_controller', SetBool, self._set_grp_ctrl) self.pos_traj_controller = ['joint_state_controller', 'gripper_controller', 'pos_traj_controller'] self.pos_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller"] self.vel_traj_controller = ['joint_state_controller', 'gripper_controller', 'vel_traj_controller'] self.vel_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller"] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig) def check_stop_flg(self): if self.stop_flag is False: return False else: return True def _start_trainnig(self, req): rospy.logdebug("_start_trainnig!!!!") self.stop_flag = False return SetBoolResponse(True, "_start_trainnig") def _stop_trainnig(self, req): rospy.logdebug("_stop_trainnig!!!!") self.stop_flag = True return SetBoolResponse(True, "_stop_trainnig") def _set_pos_ctrl(self, req): rospy.wait_for_service('set_position_controller') self._ctrl_conn.stop_controllers(self.pos_traj_controller) self._ctrl_conn.start_controllers(self.pos_controller) self._ctrl_type = 'pos' return SetBoolResponse(True, "_set_pos_ctrl") def _set_traj_pos_ctrl(self, req): rospy.wait_for_service('set_trajectory_position_controller') self._ctrl_conn.stop_controllers(self.pos_controller) self._ctrl_conn.start_controllers(self.pos_traj_controller) self._ctrl_type = 'traj_pos' return SetBoolResponse(True, "_set_traj_pos_ctrl") def _set_vel_ctrl(self, req): rospy.wait_for_service('set_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_traj_controller) self._ctrl_conn.start_controllers(self.vel_controller) self._ctrl_type = 'vel' return SetBoolResponse(True, "_set_vel_ctrl") def _set_traj_vel_ctrl(self, req): rospy.wait_for_service('set_trajectory_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_controller) self._ctrl_conn.start_controllers(self.vel_traj_controller) self._ctrl_type = 'traj_vel' return SetBoolResponse(True, "_set_traj_vel_ctrl") # def _set_grp_ctrl(self, req): # rospy.wait_for_service('set_gripper_controller') # self._ctrl_conn.start_controllers(self.gripper_controller) # return SetBoolResponse(True, "_set_grp_ctrl") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def link_state_callback(self, msg): self.link_state = msg self.end_effector = self.link_state.pose[12] self.imu_link = self.link_state.pose[5] self.door = self.link_state.pose[2] # def target_point_callback(self, msg): # self.target_point = msg def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: self._ctrl_conn.start_controllers(controllers_on="joint_state_controller") rospy.logdebug("Current joint_states not ready yet, retrying==>"+str(e)) link_states_msg = None while link_states_msg is None and not rospy.is_shutdown(): try: link_states_msg = rospy.wait_for_message("/gazebo/link_states", LinkStates, timeout=0.1) self.link_states = link_states_msg rospy.logdebug("Reading link_states READY") except Exception as e: rospy.logdebug("Reading link_states not ready yet, retrying==>"+str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_current_xyz(self): """Get x,y,z coordinates according to currrent joint angles Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang] mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = euler_from_quaternion([self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose =[] self.current_joint_pose = copy.deepcopy(init_pos) # print("[current_joint_pose]:", self.current_joint_pose, type(self.current_joint_pose)) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self,msg): self.joints_state = msg def wrench_stamped_callback(self,msg): self.wrench_stamped = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) shp_joint_ang = joint_states.position[2] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[0] wr1_joint_ang = joint_states.position[9] wr2_joint_ang = joint_states.position[10] wr3_joint_ang = joint_states.position[11] shp_joint_vel = joint_states.velocity[2] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[0] wr1_joint_vel = joint_states.velocity[9] wr2_joint_vel = joint_states.velocity[10] wr3_joint_vel = joint_states.velocity[11] q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang] eef_x, eef_y, eef_z = self.get_xyz(q) observation = [] # rospy.logdebug("List of Observations==>"+str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append(shp_joint_ang) elif obs_name == "shl_joint_ang": observation.append(shl_joint_ang) elif obs_name == "elb_joint_ang": observation.append(elb_joint_ang) elif obs_name == "wr1_joint_ang": observation.append(wr1_joint_ang) elif obs_name == "wr2_joint_ang": observation.append(wr2_joint_ang) elif obs_name == "wr3_joint_ang": observation.append(wr3_joint_ang) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append(eef_x) elif obs_name == "eef_y": observation.append(eef_y) elif obs_name == "eef_z": observation.append(eef_z) elif obs_name == "force_x": observation.append(self.force.x) elif obs_name == "force_y": observation.append(self.force.y) elif obs_name == "force_z": observation.append(self.force.z) elif obs_name == "torque_x": observation.append(self.torque.x) elif obs_name == "torque_y": observation.append(self.torque.y) elif obs_name == "torque_z": observation.append(self.torque.z) else: raise NameError('Observation Asked does not exist=='+str(obs_name)) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max(min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max(min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max(min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max(min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max(min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max(min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) # Resets the state of the environment and returns an initial observation. def reset(self): # Go to initial position self._gz_conn.unpauseSim() rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0)) init_pos0 = self.init_joints_pose(self.init_joint_pose0) self.arr_init_pos0 = np.array(init_pos0, dtype='float32') init_pos1 = self.init_joints_pose(self.init_joint_pose1) self.arr_init_pos1 = np.array(init_pos1, dtype='float32') init_g_pos1 = self.init_joints_pose(self.init_grp_pose1) arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32') jointtrajpub = JointTrajPub() for update in range(500): jointtrajpub.GrpCommand(arr_init_g_pos1) time.sleep(2) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1) time.sleep(0.3) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos0) time.sleep(0.3) # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity_zero() # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1)) rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2)) # We save that position as the current joint desired position # print("[init_joint_pose1]:", self.init_joint_pose1, type(self.init_joint_pose1)) init_pos2 = self.init_joints_pose(self.init_joint_pose2) self.arr_init_pos2 = np.array(init_pos2, dtype='float32') after_rotate = self.init_joints_pose(self.after_rotate) self.arr_after_rotate = np.array(after_rotate, dtype='float32') after_pull = self.init_joints_pose(self.after_pull) self.arr_after_pull = np.array(after_pull, dtype='float32') init_g_pos2 = self.init_joints_pose(self.init_grp_pose2) arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32') # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_pos': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'pos': self._joint_pubisher.check_publishers_connection() elif self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self._gz_conn.adjust_gravity() for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1) time.sleep(0.3) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos2) time.sleep(0.3) for update in range(50): jointtrajpub.GrpCommand(arr_init_g_pos2) time.sleep(2) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.get_observations() # print("[observations]", observation) return observation def _act(self, action): if self._ctrl_type == 'traj_pos': self.pre_ctrl_type = 'traj_pos' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'pos': self.pre_ctrl_type = 'pos' self._joint_pubisher.move_joints(action) elif self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def training_ok(self): rate = rospy.Rate(1) while self.check_stop_flg() is True: rospy.logdebug("stop_flag is ON!!!!") self._gz_conn.unpauseSim() if self.check_stop_flg() is False: break rate.sleep() def step(self, action): ''' ('action: ', array([ 0., 0. , -0., -0., -0. , 0. ], dtype=float32)) ''' rospy.logdebug("UR step func") # define the logger self.training_ok() # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() action = action + self.arr_init_pos2 self._act(action) # print("[action]", action) self.wrench_stamped self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("wrench", self.wrench_stamped, type(self.wrench_stamped)) #<class 'geometry_msgs.msg._WrenchStamped.WrenchStamped'> # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) ''' if self.force_limit < self.force.x or self.force.x < -self.force_limit: self._act(self.previous_action) # print("force.x over the limit") # print("[previous_action]", self.previous_action) elif self.force_limit < self.force.y or self.force.y < -self.force_limit: self._act(self.previous_action) # print("force.y over the limit") # print("[previous_action]", self.previous_action) elif self.force_limit < self.force.z or self.force.z < -self.force_limit: self._act(self.previous_action) # print("force.z over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: self._act(self.previous_action) # print("torque.x over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: self._act(self.previous_action) # print("torque.y over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: self._act(self.previous_action) # print("torque.z over the limit") # print("[previous_action]", self.previous_action) else: self.previous_action = copy.deepcopy(action) # print("[action]", action) ''' # Then we send the command to the robot and let it go for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # print("[observations]", observation) # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards() done = self.check_done() return observation, reward, done, {} def compute_dist_rewards(self): self.quat = self.door.orientation self.door_rpy = self.cvt_quat_to_euler(self.quat) self.quat = self.imu_link.orientation self.imu_link_rpy = self.cvt_quat_to_euler(self.quat) compute_rewards = 0 # self.rpy = self.cvt_quat_to_euler(Quaternion(0.0, 0.0, 0.7071, 0.7071)) #print ("[self.target_point]: ", [self.target_point.x, self.target_point.y, self.target_point.z]) #print ("(self.get_current_xyz(): ", self.get_current_xyz()) #end_effector_pos = np.array([self.end_effector.position.x, self.end_effector.position.y, self.end_effector.position.z]) #self.distance = np.linalg.norm(end_effector_pos - [self.target_point.x, self.target_point.y, self.target_point.z], axis=0) # print ("[door]: ", [self.door.orientation.x, self.door.orientation.y, self.door.orientation.z, self.door.orientation.w]) # print ("[imu_link]: ", [self.imu_link.orientation.x, self.imu_link.orientation.y, self.imu_link.orientation.z, self.imu_link.orientation.w]) # print ("[door_rpy]: ", [self.door_rpy.x, self.door_rpy.y, self.door_rpy.z]) # [-3.141590232638843, 4.64637166410168e-06, 3.1407993185850303] # => [-3.141587417428544, 6.811796590263218e-05, 2.8971100347923704] # print ("[self.imu_link_rpy]: ", [self.imu_link_rpy.x, self.imu_link_rpy.y, self.imu_link_rpy.z]) # [-5.017238272290064e-06, 2.560885641286913e-07, 1.5707993173228965] # => [1.2205817198134408, -4.341035340318689e-06, 1.3270298472237638] # print ("[type]: ", type(self.door_rpy), type(self.end_effector.position)) # print ("[rpy]: ", [self.rpy.x, self.rpy.y, self.rpy.z]) # self.counter = self.counter + 1 # return self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z # for door opening compute_rewards = self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z return compute_rewards # for door opening # close #('[door_rpy]: ', [-3.141589494723927, 5.371950050444065e-06, 3.140803800525111]) #('[self.imu_link_rpy]: ', [-5.406752832019292e-06, 6.896590419417709e-06, 1.5708061011106662]) # open #('[door_rpy]: ', [3.1374584007550737, -0.0018131747911100536, 2.764050391990123(delta=0.3768)]) #('[self.imu_link_rpy]: ', [1.0934212049101757(delta=1.0934), -0.004085577221111396, 1.1941435185763472(delta=0.3768)]) def check_done(self): # if 3.1408 - self.door_rpy.z > 3.14 / 180 * 10: # for door opening if self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z > 3000: # for door opening print("done") return True else : return False
def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="simplicity") self.simplicity_state_object = SimplicityState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5 ) self.simplicity_state_object.set_desired_world_point(self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.simplicity_joint_pubisher_object = JointPub() """ For this version, we consider 6 actions 1-2: Increment/Decrement front_left_shoulder 3-4: Increment/Decrement front_left_thigh 5-6: Increment/Decrement front_left_shank 7-8: Increment/Decrement front_right_shoulder 9-10: Increment/Decrement front_right_thigh 11-12: Increment/Decrement front_right_shank 13-14: Increment/Decrement back_left_shoulder 15-16: Increment/Decrement back_left_thigh 17-18: Increment/Decrement back_left_shank 19-20: Increment/Decrement back_right_shoulder 21-22: Increment/Decrement back_right_thigh 23-24: Increment/Decrement back_right_shank """ self.action_space = spaces.Discrete(24) self.reward_range = (-np.inf, np.inf) self.seed()
class CartPole3DEnv(gym.Env): #class inherit from gym.Env. def __init__(self): # We initialize and define init function. number_actions = rospy.get_param('/cart_pole_3d/n_actions') self.action_space = spaces.Discrete(number_actions) self.state_size = 3 self._seed() # get configuration parameters self.init_cart_vel = rospy.get_param('/cart_pole_3d/init_cart_vel') # Actions self.cart_speed_fixed_value = rospy.get_param('/cart_pole_3d/cart_speed_fixed_value') self.start_point = Point() self.start_point.x = rospy.get_param("/cart_pole_3d/init_cube_pose/x") self.start_point.y = rospy.get_param("/cart_pole_3d/init_cube_pose/y") self.start_point.z = rospy.get_param("/cart_pole_3d/init_cube_pose/z") # Done self.max_angle = rospy.get_param('/cart_pole_3d/max_angle') self.max_distance = rospy.get_param('/cart_pole_3d/max_distance') # Rewards self.keep_pole_up_reward = rospy.get_param("/cart_pole_3d/keep_pole_up_reward") self.end_episode_points = rospy.get_param("/cart_pole_3d/end_episode_points") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_list = ['joint_state_controller', 'cart_joint_velocity_controller' ] self.controllers_object = ControllersConnection(namespace="cart_pole_3d", controllers_list=self.controllers_list) """ Namespace of robot is the namespace in front of your controller. Controllers_list => that we want to reset at each time that we call the reset controllers. This case we just have two: joint_state_controller, inertia_wheel_roll_joint_velocity_controller We need to create a function which gets retrieves those controllers. """ self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.check_all_sensors_ready() rospy.Subscriber("/cart_pole_3d/joint_states", JointState, self.joints_callback) rospy.Subscriber("/cart_pole_3d/odom", Odometry, self.odom_callback) # rospy.Subscriber("/cart_pole_3d/imu", Imu, self.imu_callback) self._cart_velocity_publisher = rospy.Publisher('/cart_pole_3d/cart_joint_velocity_controller/command', Float64, queue_size=1) self.check_publishers_connection() self.gazebo.pauseSim() def _seed(self, seed=None): # overriden function self.np_random, seed = seeding.np_random(seed) return [seed] def _step(self, action): # overriden function self.gazebo.unpauseSim() self.set_action(action) self.gazebo.pauseSim() obs = self._get_obs() done = self._is_done(obs) info = {} reward = self.compute_reward(obs, done) simplified_obs = self.convert_obs_to_state(obs) return simplified_obs, reward, done, info def _reset(self): # We are using a virtual function defined in the gym infrastructure. """ Everytime we start episode, robot will be the same place and configurations. Because otherwise it won't learn correctly and we can't iterated. => basic stuff for Reinforcement learning. :return: """ self.gazebo.unpauseSim() """ why we need to unpauseSim because resetting controllers and for checking the sensors, we need the simulation to be running because otherwise we don't have any sensory data and we don't have access to the controller reset functions services they won't work and tell you to hit play. => it is very important. """ self.controllers_object.reset_controllers() self.check_all_sensors_ready() self.set_init_pose() #initialized robot self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self.check_all_sensors_ready() self.gazebo.pauseSim() self.init_env_variables() obs = self._get_obs() simplified_obs = self.convert_obs_to_state(obs) return simplified_obs def init_env_variables(self): """ Inits variables needed to be initialised each time we reset at the start of an episode. :return: """ self.total_distance_moved = 0.0 self.current_y_distance = self.get_y_dir_distance_from_start_point(self.start_point) self.cart_current_speed = rospy.get_param('/cart_pole_3d/init_cart_vel') def _is_done(self, observations): # MAximum distance to travel permited in meters from origin max_upper_distance = self.max_distance max_lower_distance = -self.max_distance max_angle_allowed = self.max_angle if (observations[1] > max_upper_distance or observations[1] < max_lower_distance): rospy.logerr("Cart Too Far==>" + str(observations[1])) done = True elif(observations[2] > max_angle_allowed or observations[2] < -max_angle_allowed): rospy.logerr("Pole excceed allowed angle =>" + str(observations[2])) done = True else: # rospy.loginfo("Cart NOT Too Far==>" + str(observations[1])) # rospy.loginfo("Pole still in range==>" + str(observations[2])) done = False return done def set_action(self, action): # We convert the actions to speed movements to send to the parent class CubeSingleDiskEnv if action == 0: # Move left self.cart_current_speed = self.cart_speed_fixed_value elif action == 1: # Move right self.cart_current_speed = -1*self.cart_speed_fixed_value # We clamp Values to maximum rospy.logdebug("cart_current_speed before clamp==" + str(self.cart_current_speed)) self.cart_current_speed = numpy.clip(self.cart_current_speed, -1 * self.cart_speed_fixed_value, self.cart_speed_fixed_value) rospy.logdebug("cart_current_speed after clamp==" + str(self.cart_current_speed)) self.move_joints(self.cart_current_speed) def _get_obs(self): """ Here we define what sensor data defines our robots observations To know which Variables we have acces to, we need to read the MyCubeSingleDiskEnv API DOCS :return: """ # We get the angle of the pole_angle pole_angle = round(self.joints.position[1],3) # We get the distance from the origin y_distance = self.get_y_dir_distance_from_start_point(self.start_point) # We get the current speed of the cart current_cart_speed_vel = self.get_cart_velocity() cube_observations = [ round(current_cart_speed_vel, 1), round(y_distance, 1), round(pole_angle, 1) ] return cube_observations def get_cart_velocity(self): # We get the current joint roll velocity roll_vel = self.joints.velocity[1] return roll_vel def get_y_linear_speed(self): # We get the current joint roll velocity y_linear_speed = self.odom.twist.twist.linear.y return y_linear_speed def get_y_dir_distance_from_start_point(self, start_point): """ Calculates the distance from the given point and the current position given by odometry. In this case the increase or decrease in y. :param start_point: :return: """ y_dist_dir = self.odom.pose.pose.position.y - start_point.y return y_dist_dir def get_pole_angle(self): return self.imu.orientation.y def compute_reward(self, observations, done): speed = observations[0] distance = observations[1] angle = observations[2] # if not done: # # positive for keeping the pole up # reward_keep_pole_up = self.keep_pole_up_reward # # nigative Reinforcement # reward_distance = distance * -2 # # # positive reward on angle # reward_angle = 10 / ((abs(angle) * 1)+1.1) # # reward = reward_angle + reward_distance + reward_keep_pole_up # # rospy.loginfo("Reward_distance=" + str(reward_distance)) # rospy.loginfo("Reward_angle= " + str(reward_angle)) # else: # reward = -1 * self.end_episode_points # # return reward # rospy.loginfo("pole_angle for reward==>" + str(angle)) delta = 0.7 - abs(angle) reward_pole_angle = math.exp(delta*10) # If we are moving to the left and the pole is falling left is Bad # rospy.logwarn("pole_vel==>" + str(speed)) pole_vel_sign = numpy.sign(speed) pole_angle_sign = numpy.sign(angle) # rospy.logwarn("pole_vel sign==>" + str(pole_vel_sign)) # rospy.logwarn("pole_angle sign==>" + str(pole_angle_sign)) # We want inverted signs for the speeds. We multiply by -1 to make minus positive. # global_sign + = GOOD, global_sign - = BAD base_reward = 500 if speed != 0: global_sign = pole_angle_sign * pole_vel_sign * -1 reward_for_efective_movement = base_reward * global_sign else: # Is a particular case. If it doesnt move then its good also reward_for_efective_movement = base_reward reward = reward_pole_angle + reward_for_efective_movement # rospy.logwarn("reward==>" + str(reward)+"= r_pole_angle="+str(reward_pole_angle)+",r_movement= "+str(reward_for_efective_movement)) return reward def joints_callback(self, data): self.joints = data def odom_callback(self, data): self.odom = data def check_all_sensors_ready(self): self.check_joint_states_ready() self.check_odom_ready() # self.check_imu_ready() rospy.logdebug("ALL SENSORS READY") def check_joint_states_ready(self): self.joints = None while self.joints is None and not rospy.is_shutdown(): try: self.joints = rospy.wait_for_message("/cart_pole_3d/joint_states", JointState, timeout=1.0) # check response from this topic for 1 second. if don't received respone, it mean not ready. # Assure data channels are working perfectly. rospy.logdebug("Current cart_pole_3d/joint_states READY=>" + str(self.joints)) except: rospy.logerr("Current cart_pole_3d/joint_states not ready yet, retrying for getting joint_states") return self.joints def check_odom_ready(self): self.odom = None while self.odom is None and not rospy.is_shutdown(): try: self.odom = rospy.wait_for_message("/cart_pole_3d/odom", Odometry, timeout=1.0) rospy.logdebug("Current /cart_pole_3d/odom READY=>" + str(self.odom)) except: rospy.logerr("Current /cart_pole_3d/odom not ready yet, retrying for getting odom") return self.odom def check_publishers_connection(self): """ Checks that all the publishers are working :return: """ rate = rospy.Rate(10) # 10hz while (self._cart_velocity_publisher.get_num_connections() == 0 and not rospy.is_shutdown()): rospy.logdebug("No susbribers to _cart_velocity_publisher yet so we wait and try again") try: rate.sleep() except rospy.ROSInterruptException: # This is to avoid error when world is rested, time when backwards. pass rospy.logdebug("_base_pub Publisher Connected") rospy.logdebug("All Publishers READY") def move_joints(self, cart_speed): joint_speed_value = Float64() joint_speed_value.data = cart_speed # rospy.logwarn("cart Velocity >>>>>>>" + str(joint_speed_value)) self._cart_velocity_publisher.publish(joint_speed_value) def set_init_pose(self): """Sets the Robot in its init pose """ self.move_joints(self.init_cart_vel) return True def convert_obs_to_state(self, observations): """ Converts the observations used for reward and so on to the essentials for the robot state In this case we only need the orientation of the cube and the speed of the disc. The distance doesnt condition at all the actions """ speed = observations[0] distance = observations[1] angle = observations[2] state_converted = [speed, distance, angle] return state_converted
class RobotGazeboEnv(gym.Env): def __init__(self, robot_name_space, controllers_list, reset_controls, start_init_physics_parameters=True, reset_world_or_sim="SIMULATION"): # To reset Simulations rospy.logdebug("START init RobotGazeboEnv") self.gazebo = GazeboConnection(start_init_physics_parameters, reset_world_or_sim) self.controllers_object = ControllersConnection( namespace=robot_name_space, controllers_list=controllers_list) self.reset_controls = reset_controls self.seed() # Set up ROS related variables self.episode_num = 0 self.cumulated_episode_reward = 0 self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1) rospy.logdebug("END init RobotGazeboEnv") # Env methods def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): """ Function executed each time step. Here we get the action execute it in a time step and retrieve the observations generated by that action. :param action: :return: obs, reward, done, info """ """ Here we should convert the action num to movement action, execute the action in the simulation and get the observations result of performing that action. """ rospy.logdebug("START STEP OpenAIROS") self.gazebo.unpauseSim() self._set_action(action) self.gazebo.pauseSim() obs = self._get_obs() done = self._is_done(obs) info = {} reward = self._compute_reward(obs, done) self.cumulated_episode_reward += reward rospy.logdebug("END STEP OpenAIROS") return obs, reward, done, info def reset(self): rospy.logdebug("Reseting RobotGazeboEnvironment") self._reset_sim() self._init_env_variables() self._update_episode() obs = self._get_obs() rospy.logdebug("END Reseting RobotGazeboEnvironment") return obs def close(self): """ Function executed when closing the environment. Use it for closing GUIS and other systems that need closing. :return: """ rospy.logdebug("Closing RobotGazeboEnvironment") rospy.signal_shutdown("Closing RobotGazeboEnvironment") def _update_episode(self): """ Publishes the cumulated reward of the episode and increases the episode number by one. :return: """ rospy.logwarn("PUBLISHING REWARD...") self._publish_reward_topic(self.cumulated_episode_reward, self.episode_num) rospy.logwarn("PUBLISHING REWARD...DONE=" + str(self.cumulated_episode_reward) + ",EP=" + str(self.episode_num)) self.episode_num += 1 self.cumulated_episode_reward = 0 def _publish_reward_topic(self, reward, episode_number=1): """ This function publishes the given reward in the reward topic for easy access from ROS infrastructure. :param reward: :param episode_number: :return: """ reward_msg = RLExperimentInfo() reward_msg.episode_number = episode_number reward_msg.episode_reward = reward self.reward_pub.publish(reward_msg) # Extension methods # ---------------------------- def _reset_sim(self): """Resets a simulation """ rospy.logdebug("RESET SIM START") if self.reset_controls: rospy.logdebug("RESET CONTROLLERS") self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self._check_all_systems_ready() self._set_init_pose() self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() self.controllers_object.reset_controllers() self._check_all_systems_ready() self.gazebo.pauseSim() else: rospy.logwarn("DONT RESET CONTROLLERS") self.gazebo.unpauseSim() self._check_all_systems_ready() self._set_init_pose() self.gazebo.pauseSim() self.gazebo.resetSim() self.gazebo.unpauseSim() self._check_all_systems_ready() self.gazebo.pauseSim() rospy.logdebug("RESET SIM END") return True def _set_init_pose(self): """Sets the Robot in its init pose """ raise NotImplementedError() def _check_all_systems_ready(self): """ Checks that all the sensors, publishers and other simulation systems are operational. """ raise NotImplementedError() def _get_obs(self): """Returns the observation. """ raise NotImplementedError() def _init_env_variables(self): """Inits variables needed to be initialised each time we reset at the start of an episode. """ raise NotImplementedError() def _set_action(self, action): """Applies the given action to the simulation. """ raise NotImplementedError() def _is_done(self, observations): """Indicates whether or not the episode is done ( the robot has fallen for example). """ raise NotImplementedError() def _compute_reward(self, observations, done): """Calculates the reward to give based on the observations given. """ raise NotImplementedError() def _env_setup(self, initial_qpos): """Initial configuration of the environment. Can be used to configure initial state and extract information from the simulation. """ raise NotImplementedError()
def __init__(self): # fixed frame is nearly on the origin of the ground 0,0,0.6 at laelaps body center of mass self.last_base_position = [0, 0, 0] self.distance_weight = 1 self.drift_weight = 2 self.time_step = 0.001 # default Gazebo simulation time step self.episode_number = 0 self.frames = 0 self.torques_step = [] self.euler_angles = [] self.euler_rates = [] self.base_zaxis = [] self.base_x_y = [] self.sim_t = [] self.saving_option = False # Rospy get parameters from config file: self.laelaps_model_number = rospy.get_param("/laelaps_model_number") self.ramp_model_number = rospy.get_param("/ramp_model_number") self.ramp_available = rospy.get_param("/ramp_available") self.env_goal = rospy.get_param("/env_goal") self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection( namespace="laelaps_robot", controllers_list=controllers_list) # _______________SUBSCRIBERS__________________________________________________ # give base position and quaternions rospy.Subscriber(gazebo_model_states_topic, ModelStates, self.models_callback) # give motor angle, velocity, torque rospy.Subscriber(joint_state_topic, JointState, self.joints_state_callback) rospy.Subscriber("/clock", Clock, self.clock_callback) rospy.Subscriber(rf_toe_nan_topic, Bool, self.rf_toe_nan_callback) rospy.Subscriber(rh_toe_nan_topic, Bool, self.rh_toe_nan_callback) rospy.Subscriber(lf_toe_nan_topic, Bool, self.lf_toe_nan_callback) rospy.Subscriber(lh_toe_nan_topic, Bool, self.lh_toe_nan_callback) # _______________MOTOR PUBLISHERS__________________________________________________ self.motor_pub = list() for joint in motor_joints: joint_controller = "/laelaps_robot/" + str(joint) + "/command" x = rospy.Publisher(joint_controller, Float64, queue_size=1) self.motor_pub.append(x) # # _______________Toe PUBLISHERS__________________________________________________ # toe4_pos_publisher node : RH_foot: toe1 , RF_foot: toe2, LF_foot: toe3, LH_foot: toe4 self.toe_pub = list() for idx in range(len(laelaps_feet)): toe_commands = "/laelaps_robot/toe" + str(idx + 1) + "/command" x = rospy.Publisher(toe_commands, Toe, queue_size=1) self.toe_pub.append(x) # ______________Defining observation and action space____________________________ observation_high = (self.GetObservationUpperBound() + observation_eps) observation_low = (self.GetObservationLowerBound() - observation_eps) # Four legs toe x,y are estimated by RL low = [x_low] * 4 low.extend([y_low] * 4) high = [x_high] * 4 high.extend([y_high] * 4) self.action_space = spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32) # the epsilon to avoid 0 values entering the neural network in the algorithm observation_high = (self.GetObservationUpperBound() + observation_eps) observation_low = (self.GetObservationLowerBound() - observation_eps) self.observation_space = spaces.Box(observation_low, observation_high, dtype=np.float32) # ______________Reset and seed the environment____________________________ self.seed() # seed the environment in the initial function self.init_reset() # reset the environment in the initial function
def __init__(self): # We assume that a ROS node has already been created # before initialising the environment self.set_link = rospy.ServiceProxy('/gazebo/set_link_state', SetLinkState) rospy.wait_for_service('/gazebo/set_link_state') # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param( "/desired_pose/position/x") self.desired_pose.position.y = rospy.get_param( "/desired_pose/position/y") self.desired_pose.position.z = rospy.get_param( "/desired_pose/position/z") self.desired_pose.orientation.x = rospy.get_param( "/desired_pose/orientation/x") self.desired_pose.orientation.y = rospy.get_param( "/desired_pose/orientation/y") self.desired_pose.orientation.z = rospy.get_param( "/desired_pose/orientation/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_vel = rospy.get_param("/max_vel") self.min_vel = rospy.get_param("/min_vel") self.max_acc = rospy.get_param("/max_acc") self.min_acc = rospy.get_param("/min_acc") self.max_jerk = rospy.get_param("/max_jerk") self.min_jerk = rospy.get_param("/min_jerk") self.max_snap = rospy.get_param("/max_snap") self.min_snap = rospy.get_param("/min_snap") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.num_action_space = 15 high = np.array([1. for _ in range(self.num_action_space)]) low = np.array([-1. for _ in range(self.num_action_space)]) self.action_space = spaces.Box(low=low, high=high) # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="monoped") self.multi_uav_state_object = MultiUavState( max_vel=self.max_vel, min_vel=self.min_vel, max_acc=self.max_acc, min_acc=self.min_acc, max_jerk=self.max_jerk, min_jerk=self.min_jerk, max_snap=self.max_snap, min_snap=self.min_snap, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, done_reward=self.done_reward, alive_reward=self.alive_reward) self.multi_uav_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z, self.desired_pose.orientation.x, self.desired_pose.orientation.y, self.desired_pose.orientation.z) self.mellingers = [ MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35), MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35), MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35), MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35) ] """ For this version, we consider 6 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5-6) Increment/Decrement kfe_joint """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() print("end of init...")