def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 #QinjieLin self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yam_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.thetaStep = rospy.get_param('/theta_step', 0.0) self.thetaDistance = rospy.get_param('/theta_distance', 0.0) self.thetaCollision = rospy.get_param('/theta_collision', 0.0) self.thetaTurning = rospy.get_param('/theta_turning', 0.0) self.thetaClearance = rospy.get_param('/theta_clearance', 0.0) self.thetaGOal = rospy.get_param('/theta_goal', 0.0) self.laserStep = rospy.get_param('/laser_step', 60)
def __init__(self, ): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 #QinjieLin self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) # self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) # self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) # self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yaw_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.dis_weight = rospy.get_param('/dis_weight', 0.0) self.lin_weight = rospy.get_param('/lin_weight', 0.0) self.obs_weight = rospy.get_param('/obs_weight', 0.0) self.ori_weight = rospy.get_param('/ori_weight', 0.0) self.col_weight = rospy.get_param('/col_weight', 0.0) self.goal_weight = rospy.get_param('/goal_weight', 0.0) self.rot_weight = rospy.get_param('/rot_weight', 0.0) self.laserStep = rospy.get_param('/laser_step', 60) self.stepTime = rospy.get_param('/step_time', 0.5)
def __init__(self, index=-1): if (index == -1): print("########## index == -1 ##########") self.cmd_topic = "/cmd_vel" self.odom_topic = "/odom" self.scan_topic = "/base_scan" self.crash_topic = "/is_crashed" else: self.cmd_topic = "robot_" + str(index) + "/cmd_vel" self.odom_topic = "robot_" + str(index) + "/odom" self.scan_topic = "robot_" + str(index) + "/base_scan" self.crash_topic = "robot_" + str(index) + "/is_crashed" self.reset_service = 'reset_positions' self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher(self.cmd_topic, Twist, queue_size=5) self.sub_odom = rospy.Subscriber(self.odom_topic, Odometry, self.getOdometry) self.sub_crash = rospy.Subscriber(self.crash_topic, Int8, self.crash_callback) self.reset_stage = rospy.ServiceProxy(self.reset_service, Empty) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yaw_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.dis_weight = rospy.get_param('/dis_weight', 0.0) self.lin_weight = rospy.get_param('/lin_weight', 0.0) self.obs_weight = rospy.get_param('/obs_weight', 0.0) self.ori_weight = rospy.get_param('/ori_weight', 0.0) self.col_weight = rospy.get_param('/col_weight', 0.0) self.goal_weight = rospy.get_param('/goal_weight', 0.0) self.rot_weight = rospy.get_param('/rot_weight', 0.0) self.laserStep = rospy.get_param('/laser_step', 60) self.stepTime = rospy.get_param('/step_time', 0.5) self.realWorldFlag = rospy.get_param('/real_world') self.simScale = 500.0 if (self.realWorldFlag): self.simScale = 1.0 rospy.sleep(1.)
def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn()
def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.direction_of_movement = 0 self.scan_values = None self.initGoal = True self.max_scan_value = float(3.5) self.action_size = action_size self.get_goalbox = False self.current_position = Pose() self.pub_cmd_vel = rospy.Publisher('/mobile_base/commands/velocity', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.respawn_goal = Respawn()
def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown)
def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher( '/hydrone_aerial_underwater/cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber( '/hydrone_aerial_underwater/ground_truth/odometry', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_world', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. self.stopped = 0 #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown)
def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. self.past_action = np.array([0.,0.]) obs_high = np.concatenate((np.array([3.5] * 10) ,np.array([0.22, 2.0]), np.array([10.]), np.array([10.]) )) obs_low = np.concatenate((np.array([0.] * 10) ,np.array([0.0, -2.0]), np.array([-10.]), np.array([-10.]) )) self.action_space = spaces.Box(low=np.array([0., -2.0]) ,high=np.array([0.22, 2.0]), dtype=np.float32) self.observation_space = spaces.Box(low=obs_low , high=obs_high, dtype=np.float32) #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown)
def __init__(self, action_size, prev_succ=0): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=10) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.set_pos_proxy = rospy.ServiceProxy('gazebo/set_model_state', SetModelState) self.respawn_goal = Respawn(prev_succ=prev_succ) self.colliding = False self.spawn_pos = ModelState() self.spawn_pos.pose.position.x = BOT_INIT_X self.spawn_pos.pose.position.y = BOT_INIT_Y self.spawn_pos.model_name = "turtlebot3_burger"
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0.0 def unpause_proxy(self): self.unpause_proxy def pause_proxy(self): self.pause_proxy def getGoalDistace(self): goal_distance = math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan, past_action): scan_range = [] heading = self.heading min_range = 0.15 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: done = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: self.get_goalbox = True return scan_range + [ past_action[0], past_action[1], heading, current_distance ], done def setReward(self, state, done, action): current_distance = state[-1] heading = state[-2] distance_rate = (self.past_distance - current_distance) reward = (500 * distance_rate) self.past_distance = current_distance if done: rospy.loginfo("Collision!!") reward = -150 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 200 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action, past_action): #print "Action: ", action ang_vel = action[1] if ang_vel >= 1.0: ang_vel = 1.0 if ang_vel <= -1.0: ang_vel = -1.0 vel_cmd = Twist() vel_cmd.linear.x = action[0] * 0.3 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data, past_action) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done = self.getState(data, [0., 0.]) return np.asarray(state)
class Environment(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.direction_of_movement = 0 self.scan_values = None self.initGoal = True self.max_scan_value = float(3.5) self.action_size = action_size self.get_goalbox = False self.current_position = Pose() self.pub_cmd_vel = rospy.Publisher('/mobile_base/commands/velocity', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.current_position.x, self.goal_y - self.current_position.y), 2) return goal_distance def getOdometry(self, odom): self.current_position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.current_position.y, self.goal_x - self.current_position.x) #print self.current_position.x + self.current_position.y direction_of_movement = goal_angle - yaw if direction_of_movement > pi: direction_of_movement -= 2 * pi elif direction_of_movement < -pi: direction_of_movement += 2 * pi self.direction_of_movement = round(direction_of_movement, 2) def getState(self, scan): scan_range = [] direction_of_movement = self.direction_of_movement min_dist_for_collision = 0.13 done = False nr_of_samples = 23 num_skipped_angles = int(len(scan.ranges) / nr_of_samples) for i in range(len(scan.ranges)): base = i % num_skipped_angles if base == 0: if scan.ranges[i] == float('Inf'): scan_range.append(self.max_scan_value) elif scan.ranges[i] > self.max_scan_value: scan_range.append(self.max_scan_value) elif np.isnan(scan.ranges[i]): scan_range.append(float(0)) else: scan_value = float('%.2f' % scan.ranges[i]) scan_range.append(scan_value) closest_obstacle_distance = round(min(scan_range), 2) # rounded to ndigits = 2 closest_obstacle_angle = np.argmin(scan_range) # number of readings that are less than the minimum distance for collision possible_collision_readings = sum(i < min_dist_for_collision for i in scan_range) percentage_possible_collision_readings = float( possible_collision_readings) / len(scan_range) * 100 # > 30% of scan values are showing a possible collision if percentage_possible_collision_readings > 30.0: done = True if min_dist_for_collision > min(scan_range) > 0: # check done = True current_distance = round( math.hypot(self.goal_x - self.current_position.x, self.goal_y - self.current_position.y), 2) # if robot is 0.5m away from the goal, set the goal box to true - the goal has been reached if current_distance < 0.3: self.get_goalbox = True return scan_range + [ direction_of_movement, current_distance, closest_obstacle_distance, closest_obstacle_angle ], done def setReward(self, state, done, action): yaw_reward = [] current_distance_from_goal = state[-3] direction_of_movement = state[-4] for i in range(5): angle = pi / 4 + direction_of_movement + (pi / 8 * i) turn_reward = 1 - 4 * math.fabs( 0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) yaw_reward.append(turn_reward) distance_rate = 2**(current_distance_from_goal / self.goal_distance) reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) if done: rospy.loginfo("Collision!!") reward = -150 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 200 self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): max_angular_velocity = 1.5 angular_velocity = ( (self.action_size - 1) / 2 - action) * max_angular_velocity * 0.5 move_velocity_cmd = Twist() move_velocity_cmd.linear.x = 0.15 move_velocity_cmd.angular.z = angular_velocity self.pub_cmd_vel.publish(move_velocity_cmd) laser_scan_data = None while laser_scan_data is None: try: laser_scan_data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(laser_scan_data) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): # reset environment os.system('rosservice call /gazebo/reset_world "{}"') # set up the odometry reset publisher reset_odom = rospy.Publisher('/mobile_base/commands/reset_odometry', Empty, queue_size=10) # reset odometry (these messages take a few iterations to get through) timer = time() while time() - timer < 0.25: reset_odom.publish(Empty()) laser_scan_data = None # make sure laser scan values exist while laser_scan_data is None: try: laser_scan_data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass # States last 4 elements are important -4 is heading, -3 is current distance, -2 is obstacle min range, and the -1 is obstacle angle. # if has reached a previous goal, set a new goal if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done = self.getState(laser_scan_data) return np.asarray(state)
class Env(): def __init__(self, action_size, prev_succ=0): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=10) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.set_pos_proxy = rospy.ServiceProxy('gazebo/set_model_state', SetModelState) self.respawn_goal = Respawn(prev_succ=prev_succ) self.colliding = False self.spawn_pos = ModelState() self.spawn_pos.pose.position.x = BOT_INIT_X self.spawn_pos.pose.position.y = BOT_INIT_Y self.spawn_pos.model_name = "turtlebot3_burger" def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.13 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: done = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: self.get_goalbox = True return scan_range + [heading, current_distance], done def setReward(self, state, done, action): yaw_reward = [] current_distance = state[-1] heading = state[-2] for i in range(10): angle = -pi / 4 + heading + (pi / 8 * (i % 5)) + pi / 2 tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) if i > 4: tr = tr * -1 yaw_reward.append(tr) distance_rate = 2**(current_distance / self.goal_distance) reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) - 1 #reward = 3 - distance_rate if done: if self.colliding == False: self.colliding = True rospy.loginfo("Collision!!") reward = colli_reward #self.pub_cmd_vel.publish(Twist()) else: self.colliding = False if self.get_goalbox: rospy.loginfo("Goal!!") reward = 750 self.pub_cmd_vel.publish(Twist()) #self.get_goalbox = False return reward def stopHere(self): self.pub_cmd_vel.publish(Twist()) def step(self, action): max_angular_vel = 1.5 ang_vel = ((self.action_size / 2 - 1) / 2 - (action % 5)) * max_angular_vel * 0.5 vel_cmd = Twist() vel_cmd.linear.x = 0.15 if action > 4: vel_cmd.linear.x = -0.15 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=10) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) goalbox = False if self.get_goalbox: goalbox = True self.get_goalbox = False return np.asarray(state), reward, done, goalbox def reset(self, win=False): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") if self.initGoal: self.respawn_goal.respawnModel() else: rospy.wait_for_service('gazebo/set_model_state') if win: if WORLD_NAME != 'boxes_1': new_x = random.random( ) * bot_xrange - bot_xrange / 2 + bot_xcenter new_y = random.random( ) * bot_yrange - bot_yrange / 2 + bot_ycenter flag = self.respawn_goal.judge_goal_collision(new_x, new_y) while flag: #print('bot gene colli') new_x = random.random( ) * bot_xrange - bot_xrange / 2 + bot_xcenter new_y = random.random( ) * bot_yrange - bot_yrange / 2 + bot_ycenter flag = self.respawn_goal.judge_goal_collision( new_x, new_y) self.spawn_pos.pose.position.x = new_x self.spawn_pos.pose.position.y = new_y else: goal_x = -1.3 goal_y = -4.3 gene_dis = self.respawn_goal.get_gene_dis() flag = True while flag: theta = random.random() * 2 * math.pi new_x = goal_x + gene_dis * math.cos(theta) new_y = goal_y + gene_dis * math.sin(theta) flag = self.respawn_goal.judge_goal_collision( new_x, new_y) self.spawn_pos.pose.position.x = new_x self.spawn_pos.pose.position.y = new_y #STATE = ModelStates() #STATE.pose = [self.spawn_pos.pose] #STATE.twist = [self.spawn_pos.twist] #STATE.name = ["turtlebot3_burger"] try: self.set_pos_proxy(self.spawn_pos) print('set_model_state service') except (rospy.ServiceException) as e: print("gazebo/set_model_state service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=10) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition( self.spawn_pos) self.initGoal = False else: self.goal_x, self.goal_y = self.respawn_goal.getPosition( self.spawn_pos, win, delete=win) self.goal_distance = self.getGoalDistace() state, done = self.getState(data) return np.asarray(state)
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.last_distance = 0 self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): #得到里程计 self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation #方向 orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) #得到yaw航向角 goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) #得到弧度值 heading = goal_angle - yaw #目标和自身角度的差 if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) #得到从当前方向转到面向目标点的转向 def getState(self, scan, linear, ang_vel): scan_range = [] heading = self.heading min_range = 0.180 done = False #print("scan.ranges:", len(scan.ranges)) for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): #等于正无穷 scan_range.append(3.5) elif np.isnan(scan.ranges[i]): #判断是否为空值 scan_range.append(0) else: scan_range.append(scan.ranges[i]) #obstacle_min_range = round(min(scan_range), 2) #障碍物距离最小的地方 #print("min_distance=", obstacle_min_range) #obstacle_angle = np.argmin(scan_range) #给出最小值的下标 找到障碍物在哪 #print("min_distance_index=", obstacle_angle) if min_range > min(scan_range) > 0: done = True #撞到done置为True # 返回所有参数的平方和的平方根 current_distance = self.getGoalDistace() if current_distance < 0.2: #目标与机器人的距离 self.get_goalbox = True done = True return scan_range + [heading, current_distance, linear, ang_vel ], done #相对位置以及上一时刻的线速度和角速度 def setReward(self, state, done): obstacle_min_range = state[-2] current_distance = state[-3] heading = state[-4] reward = (self.last_distance - current_distance) - 10 * math.cos(heading) ob_reward = 0 if obstacle_min_range < 0.5: ob_reward += -20 reward = round(reward + ob_reward, 3) if done and self.get_goalbox is False: rospy.loginfo("Collision!!") reward = -200 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 200 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) #self.goal_distance = self.getGoalDistace() self.get_goalbox = False #self.last_distance = self.getGoalDistace() return reward def step(self, action): self.last_distance = self.getGoalDistace() ang_vel = action[0] linear = action[1] vel_cmd = Twist( ) #指线速度角速度的消息类型,通常是用在运动话题/cmd_vel中,被base controller节点监听 vel_cmd.linear.x = linear #线速度 vel_cmd.angular.z = ang_vel #角速度 #print("ang_val = " + str(ang_vel) + " ,linear = " + str(linear)) self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data, linear, ang_vel) reward = self.setReward(state, done) #print("step_reward=" + str(reward)) self.last_distance = self.getGoalDistace() return np.asarray(state), reward, done #states,reward,isget def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False #self.goal_distance = self.getGoalDistace() #self.last_distance = self.getGoalDistace() print(self.goal_x) state, done = self.getState(data, 0, 0) return np.asarray(state)
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): #得到里程计 self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation #方向 orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w] _, _, yaw = euler_from_quaternion(orientation_list) #四元数中的欧拉 goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x)#得到弧度值 heading = goal_angle - yaw #目标和自身角度的差 if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) #得到转向 def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.2 done = False print("scan.ranges:%d"%len(scan_range)) for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): #等于正无穷 scan_range.append(3.5) elif np.isnan(scan.ranges[i]): #判断是否为空值 scan_range.append(0) else: scan_range.append(scan.ranges[i]) obstacle_min_range = round(min(scan_range), 2) #障碍物距离最小的地方 print("min_distance=%0.2f"%obstacle_min_range) obstacle_angle = np.argmin(scan_range) #给出最小值的下标 找到障碍物在哪 print("min_distance_index=%d"%obstacle_angle) if min_range > min(scan_range) > 0: done = True #撞到done置为True # 返回所有参数的平方和的平方根 current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: #目标与机器人的距离 self.get_goalbox = True return scan_range + [heading, current_distance, obstacle_min_range, obstacle_angle], done def setReward(self, state, done, action): current_distance = state[-3] heading = state[-4] angle = -pi / 4 + heading + (pi / 8) + pi / 2 tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) distance_rate = 2 ** (current_distance / self.goal_distance) # if obstacle_min_range < 0.5: # ob_reward = -5 # else: # ob_reward = 0 reward = -2 * (distance_rate+tr) #print("setReward:") #print(reward) if done: rospy.loginfo("Collision!!") reward = -200 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 800 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): max_angular_vel = 1.5 ang_vel = action vel_cmd = Twist() #指线速度角速度的消息类型,通常是用在运动话题/cmd_vel中,被base controller节点监听 vel_cmd.linear.x = 0.15 #线速度 vel_cmd.angular.z = round(ang_vel,2) #角速度 print("ang_val=%0.2f" %ang_vel) self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) print("step_reward:%.2f"%reward) return np.asarray(state), reward, done #states,reward,isget def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done= self.getState(data) return np.asarray(state)
class Env(): def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.goal_numbers = 10 self.collision_numbers = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.13 # bateu collision = False goal = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: collision = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: goal = True return scan_range, current_distance, collision, goal def shutdown(self): rospy.loginfo("Terminado atividade do TurtleBot") self.pub_cmd_vel.publish(Twist()) rospy.sleep(1) rospy.signal_shutdown("The End") def report_goal_distance(self, distance, collision, goal): if collision: rospy.loginfo("Collision!!") self.collision_numbers += 1 if goal: rospy.loginfo("Goal!!") self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() distance = self.goal_distance rospy.loginfo("Target position: x-> %s, y-> %s!!", self.goal_x, self.goal_y) self.goal_numbers -= 1 rospy.loginfo( "Number of targets %s / distance to curent goal %s / collission number %s", self.goal_numbers, distance, self.collision_numbers) def step(self, action): liner_vel = action[0] ang_vel = action[1] if self.goal_numbers == 0: self.shutdown() vel_cmd = Twist() vel_cmd.linear.x = liner_vel vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, distance, collision, goal = self.getState(data) self.report_goal_distance(distance, collision, goal) return np.asarray(state) def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False rospy.loginfo("Target position: x-> %s, y-> %s!!", self.goal_x, self.goal_y) self.goal_distance = self.getGoalDistace() state, distance, collision, goal = self.getState(data) self.report_goal_distance(distance, collision, goal) return np.asarray(state)
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): # function called when reset or goal achieved # meaning goal_dist only calculated for the beginning of episode goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): # Odometry : using data from sensors to calculate the robot's change in position over time self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation # not sure what orientation.w is orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w] # yaw means rotation about the z-axis : rotating right or left # euler_from_quarternion : change from [x,y,z,w] to [roll, pitch, yaw] _, _, yaw = euler_from_quaternion(orientation_list) # which direction the robot should rotate to in order to reach the goal # somewhat like calculation of arctan with some conditional values goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) # is it going in the right direction? heading = goal_angle - yaw # too much off to the right? if heading > pi: # correction seems way~~~~ to big # is there going to be some adjustments? # OR # it could be that the robot only can do heading : within range of pi and -pi # so correcting the numbers just incase heading -= 2 * pi # too much off to the left elif heading < -pi: heading += 2 * pi # yeah but lets keep the floating point simple self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading # proximity to obstacle for the bot to collide # (edit1) min_range from 0.13 to 0.26 min_range = 0.26 # done means collision done = False # scanned by robot for i in range(len(scan.ranges)): # nothing within range if scan.ranges[i] == float('Inf'): # nothing within range 3.5 scan_range.append(3.5) # so close that dist = NAN # this would mean not just collision but also fusion, combining of the bot and the obstacle elif np.isnan(scan.ranges[i]): scan_range.append(0) # otherwise just use the scan results else: scan_range.append(scan.ranges[i]) # out of the scanned distances, which one shows how close the bot is to an obstacle? obstacle_min_range = round(min(scan_range), 2) # and in which direction? # for direction, just track which laser gave the min result by argmin obstacle_angle = np.argmin(scan_range) # yeah so condition for collision # is min(scan_range) > 0: necessary? debugging efforts for scan_range of negative or 0 which shouldn't occur? # (edit1) from min(scan_range) > 0 to min(scan_range) >= 0 if min_range > min(scan_range) >= 0: done = True # so current distance would mean how much further to go for the GOAL current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y),2) # if close enough, let it be a GOAL if current_distance < 0.2: self.get_goalbox = True # concatenation with scan_range and heading, ... , obstanle_angle to return as STATE return scan_range + [heading, current_distance, obstacle_min_range, obstacle_angle], done def setReward(self, state, done, action): yaw_reward = [] # state = [scan_range ...., heading, current_distance, obstacle_min_range, obstacle_angle] obstacle_min_range = state[-2] current_distance = state[-3] heading = state[-4] # five times for five actions # ACTION # linear velocity is always 0.15m/s # angular velocity = { 0 : -1.5rad/s, 1 : -0.75rad/s, 2 : 0rad/s, 3 : 0.75rad/s, 4 : 1.5rad/s} for i in range(5): # angle # 0 : heading + pi/4 # 1 : heading + 3pi/8 # 2 : heading + pi/2 # 3 : heading + 5pi/8 # 3 : heading + 3pi/4 # note heading is within [-pi, pi] angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2 # fabs = floation absolute value # modf = tuple of fraction and integer part # hmm basically seems to be correcting the equation for angle, # so remaining would be something like # tr = 1 - 4 * (heading + (pi / 8 * i) corrected to the appropriate units) # so it would result as a table that shouls # action (rotation) and how the heading is in the right direction # totally on track : tr = 1, # off track : tr < 1 or even tr < 0 tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) yaw_reward.append(tr) # the closer to GOAL, to smaller this value # exponentially distance_rate = 2 ** (current_distance / self.goal_distance) # if too close to obstacle, negative reward if obstacle_min_range < 0.5: ob_reward = -5 else: ob_reward = 0 # so reward for each action would be returned as, # on track? how far? safe distance from obstacles? reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) + ob_reward # or just extreme values in the case of collision if done: rospy.loginfo("Collision!!") reward = -500 self.pub_cmd_vel.publish(Twist()) ### no need to do anything after collision? ### get state always resets done = False, but is that enough to prevent getting stuck? ### such implementation could be in dqn.py # and goal # note that immediately after goal, # it respawns, goal dist calculated, and goal = false which means goal box should be found again ### is this enough for reset? if self.get_goalbox: rospy.loginfo("Goal!!") reward = 1000 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): max_angular_vel = 1.5 # action_size defined in dqn.py ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5 vel_cmd = Twist() # fixed linear vel as commented above vel_cmd.linear.x = 0.15 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: ### if my laptop is too slow, would increasing the timeout prevent errors? data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass # with condition, goal is resetted if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done = self.getState(data) return np.asarray(state)
class Env(): def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher( '/hydrone_aerial_underwater/cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber( '/hydrone_aerial_underwater/ground_truth/odometry', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_world', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. self.stopped = 0 #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown) def shutdown(self): #you can stop turtlebot by publishing an empty Twist #message rospy.loginfo("Stopping Simulation") self.pub_cmd_vel.publish(Twist()) rospy.sleep(1) def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.past_distance = goal_distance return goal_distance def getOdometry(self, odom): self.past_position = copy.deepcopy(self.position) self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) #print 'yaw', yaw #print 'gA', goal_angle heading = goal_angle - yaw #print 'heading', heading if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 3) def getState(self, scan, past_action): scan_range = [] heading = self.heading min_range = 0.135 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: done = True for pa in past_action: scan_range.append(pa) current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.25: self.get_goalbox = True # print(heading, current_distance) return scan_range + [heading, current_distance], done def setReward(self, state, scan, done): current_distance = state[-1] heading = state[-2] #print('cur:', current_distance, self.past_distance) reward = 0 # distance_rate = (self.past_distance - current_distance) # if distance_rate > 0: # # reward = 200.*distance_rate # reward = 1 # if (abs(self.heading) < math.pi/2): # if abs(self.heading) <= 0.01: # self.heading = 0.01 # if abs(1.0/(self.heading)) < 10.0 and (scan.ranges[0] > current_distance): # reward += abs(1.0/(self.heading))/scan.ranges[0] # elif abs(1.0/(self.heading)) > 10.0 and (scan.ranges[0] > current_distance): # reward += 10.0/scan.ranges[0] # if (current_distance < 1.0): # reward += 1.0/current_distance # # if (current_distance >= 1.0): # # reward += -0.1*current_distance # print(abs(1.0/(self.heading)), scan.ranges[0], current_distance) # print(reward) # # # if distance_rate == 0: # # # reward = 0. # if distance_rate <= 0: # # reward = -8. # reward = -1.0/current_distance. # print(reward) #angle_reward = math.pi - abs(heading) #print('d', 500*distance_rate) #reward = 500.*distance_rate #+ 3.*angle_reward # self.past_distance = current_distance # a, b, c, d = float('{0:.3f}'.format(self.position.x)), float('{0:.3f}'.format(self.past_position.x)), float('{0:.3f}'.format(self.position.y)), float('{0:.3f}'.format(self.past_position.y)) # if a == b and c == d: # # rospy.loginfo('\n<<<<<Stopped>>>>>\n') # # print('\n' + str(a) + ' ' + str(b) + ' ' + str(c) + ' ' + str(d) + '\n') # self.stopped += 1 # print(self.stopped) # if self.stopped == 20: # rospy.loginfo('Robot is in the same 10 times in a row') # self.stopped = 0 # done = True data = None while data is None: try: data = rospy.wait_for_message( '/hydrone_aerial_underwater/scan', LaserScan, timeout=5) except: pass # print(data.ranges) if (min(data.ranges) < 0.6): done = True # else: # # rospy.loginfo('\n>>>>> not stopped>>>>>\n') # self.stopped = 0 if done: rospy.loginfo("Collision!!") # reward = -550. reward = -10. self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") # reward = 500. reward = 100. self.pub_cmd_vel.publish(Twist()) if world and target_not_movable: self.reset() self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward, done def step(self, action, past_action): linear_vel_x = action[0] angular_vel_z = action[1] # angular_vel_z = action[2] vel_cmd = Twist() vel_cmd.linear.x = linear_vel_x # vel_cmd.linear.y = linear_vel_y vel_cmd.angular.z = angular_vel_z self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message( '/hydrone_aerial_underwater/scan', LaserScan, timeout=5) except: pass state, done = self.getState(data, past_action) reward, done = self.setReward(state, data, done) return np.asarray(state), reward, done def reset(self): #print('aqui2_____________---') rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message( '/hydrone_aerial_underwater/scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False else: self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() # state, _ = self.getState(data, [0.,0., 0.0]) state, _ = self.getState(data, [0., 0.]) return np.asarray(state)
class Env(): def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown) def shutdown(self): #you can stop turtlebot by publishing an empty Twist #message rospy.loginfo("Stopping TurtleBot") self.pub_cmd_vel.publish(Twist()) rospy.sleep(1) def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.past_distance = goal_distance return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) #print('yaw', yaw) #print('gA', goal_angle) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 3) #print('heading', heading) def getState(self, scan, past_action): scan_range = [] heading = self.heading min_range = 0.17 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: done = True for pa in past_action: scan_range.append(pa) current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: self.get_goalbox = True return scan_range + [heading, current_distance], done def setReward(self, state, done): current_distance = state[-1] heading = state[-2] #print('cur:', current_distance, self.past_distance) reward = 0. for i in range(10): if state[i] <= 0.4: reward = reward - 20 print("\nwarning!\n") #print(reward) break #print("\n-50\n") distance_rate = (self.past_distance - current_distance) if distance_rate > 0: reward += 300. * distance_rate #if distance_rate == 0: # reward = -10. if distance_rate <= 0: reward -= 8. #print('original',reward) reward -= abs(heading) * 50 #print('new',reward) #print('d', 500*distance_rate) #reward = 500.*distance_rate #+ 3.*angle_reward self.past_distance = current_distance if done: print("collision") rospy.loginfo("Collision!!") reward = -550. self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 500. self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action, past_action): linear_vel = action[0] ang_vel = action[1] vel_cmd = Twist() vel_cmd.linear.x = linear_vel vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data, past_action) reward = self.setReward(state, done) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False else: self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() state, done = self.getState(data, [0., 0.]) return np.asarray(state)
step += 1 if __name__ == '__main__': #comm = MPI.COMM_WORLD #rank = comm.Get_rank() #size = comm.Get_size() rospy.init_node("turtlebot3_ppo_v3", anonymous=None) filename = rospy.get_param('/filename', "autoRL_2") load_ep = rospy.get_param('/load_ep', 800) policy_path = os.path.dirname(os.path.abspath( __file__)) + "/../../../../../../clever/saved_model_ppo/" file = policy_path + filename + '/Stage1_%d' % load_ep print("filename:", file) respawn_goal = Respawn() print("begin to load world") env = TurtlebotWorld(OBS_SIZE, index=1, num_env=NUM_ENV) print("generate the world") reward = None action_bound = [[0, -3], [1, 3]] policy_path = 'policy' # policy = MLPPolicy(obs_size, act_size) policy = CNNPolicy(frames=LASER_HIST, action_space=2) policy.cuda() opt = Adam(policy.parameters(), lr=LEARNING_RATE) mse = nn.MSELoss() if not os.path.exists(policy_path):
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.goal_reached = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.stage = rospy.get_param('stage_number') def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.13 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) obstacle_min_range = round(min(scan_range), 2) obstacle_angle = np.argmin(scan_range) if min_range > min(scan_range) > 0: done = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: self.get_goalbox = True self.goal_reached = True if self.stage == 1: return scan_range + [heading, current_distance], done else: return scan_range + [ heading, current_distance, obstacle_min_range, obstacle_angle ], done def setReward(self, state, done, action): yaw_reward = [] if self.stage == 1: current_distance = state[-1] heading = state[-2] else: obstacle_min_range = state[-2] current_distance = state[-3] heading = state[-4] additional_term = -pi / 4 * (action[0] - 1) angle = -pi / 4 + heading + additional_term + pi / 2 tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) yaw_reward = tr distance_rate = 2**( current_distance / self.goal_distance) if self.goal_distance > 0 else 100 if self.stage == 4: if obstacle_min_range < 0.5: ob_reward = -5 else: ob_reward = 0 reward = ((round(yaw_reward * 5, 2)) * distance_rate) + ob_reward else: reward = ((round(yaw_reward * 5, 2)) * distance_rate) if done: rospy.loginfo("Collision!!") if self.stage == 1: reward = -200 elif self.stage == 4: reward = -500 else: reward = -150 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") if self.stage == 4: reward = 1000 else: reward = 200 self.pub_cmd_vel.publish(Twist()) goal_count = rospy.get_param('goal_count') if goal_count == 4: goal_count += 1 rospy.set_param('goal_count', goal_count) if goal_count < 4: goal_count += 1 rospy.set_param('goal_count', goal_count) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): #max_angular_vel = 1.5 #ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5 max_angular_vel = 2 min_angular_vel = -2 sigma = (max_angular_vel - min_angular_vel) / 2 mean = (max_angular_vel + min_angular_vel) / 2 ang_vel = sigma * action[0] + mean # max_linear_velocity = 0.5 # min_linear_velocity = 0 # sigma_l = (max_linear_velocity - min_linear_velocity) / 2 # mean_l = (max_linear_velocity + min_linear_velocity) / 2 # linear_velocity = sigma_l * action[1] + mean_l linear_velocity = 0.15 vel_cmd = Twist() vel_cmd.linear.x = linear_velocity vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done = self.getState(data) return np.asarray(state)
class Env(): def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 #QinjieLin self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yam_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.thetaStep = rospy.get_param('/theta_step', 0.0) self.thetaDistance = rospy.get_param('/theta_distance', 0.0) self.thetaCollision = rospy.get_param('/theta_collision', 0.0) self.thetaTurning = rospy.get_param('/theta_turning', 0.0) self.thetaClearance = rospy.get_param('/theta_clearance', 0.0) self.thetaGOal = rospy.get_param('/theta_goal', 0.0) self.laserStep = rospy.get_param('/laser_step', 60) def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.13 done = False full_scan_range = [] range_dim = int(self.laserStep) num_dim = int(len(scan.ranges) / range_dim) for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): full_scan_range.append(3.5) elif np.isnan(scan.ranges[i]): full_scan_range.append(0) else: full_scan_range.append(scan.ranges[i]) for i in range(num_dim): begin = i * range_dim end = (i + 1) * range_dim - 1 if (end >= len(scan.ranges)): end = -1 scan_range.append(min(full_scan_range[begin:end])) obstacle_min_range = round(min(scan_range), 2) obstacle_angle = np.argmin(scan_range) if min_range > min(scan_range) > 0: done = True self.lastDistance = self.currentDistance #qinjielin current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.currentDistance = current_distance #qinjielin if current_distance < 0.2: self.get_goalbox = True return scan_range + [ heading, current_distance, obstacle_min_range, obstacle_angle ], done # return [heading, current_distance, obstacle_min_range, obstacle_angle], done def setReward(self, state, done, action): current_distance = state[-3] heading = state[-4] obs_min_range = state[-2] rStep = 1 rDistance = -1 * (current_distance) rCollision = 0 if done: rospy.loginfo("Collision!!") rCollision = 1 self.pub_cmd_vel.publish(Twist()) rTurning = -1 * (math.fabs(action[0])) rClearance = obs_min_range rGoal = 0 if self.get_goalbox: rospy.loginfo("Goal!!") self.numSuccess += 1 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.get_goalbox = False self.endTime = time.time() exeTime = self.endTime - self.startTime self.startTime = time.time() print("exetime:", exeTime) reward = self.thetaStep * rStep + self.thetaDistance * rDistance + self.thetaCollision * rCollision + self.thetaTurning * rTurning + self.thetaClearance * rClearance + self.thetaGOal * rGoal return reward def step(self, action): max_angular_vel = 2.0 max_linear_vel = 0.3 ang_vel = ((action[0] / 2)) * max_angular_vel #0.15 linear_vel = ((action[1] + 2) / 4) * max_linear_vel #0.15 # ang_vel=action[0] # linear_vel = action[1] if (math.fabs(ang_vel) < 0.5): ang_vel = 0 # if(linear_vel<0.15): # linear_vel = 0 vel_cmd = Twist() vel_cmd.linear.x = linear_vel # change when learning vel_cmd.angular.z = ang_vel # change when learing tansAction = [ang_vel, linear_vel] self.pub_cmd_vel.publish(vel_cmd) rospy.sleep(0.5) #qinjielin data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, tansAction) return np.asarray(state), reward, done def reset(self): # print("waiting service") # rospy.wait_for_service('gazebo/reset_simulation') # # print("got service") # try: # self.reset_proxy() # except (rospy.ServiceException) as e: # print("gazebo/reset_simulation service call failed") self.reset_turtlebot3() # print("getting scan") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: # print("getting scan pass") pass # print("got scan") if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False # print("init goal done ") self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.startTime = time.time() state, done = self.getState(data) # print("env rest done") return np.asarray(state) def reset_turtlebot3(self): turState = ModelState() turState.model_name = "turtlebot3_burger" turState.pose.position.x = self.resetX turState.pose.position.y = self.resetY turState.pose.position.z = self.resetZ turState.pose.orientation.x = self.resetQua[0] turState.pose.orientation.y = self.resetQua[1] turState.pose.orientation.z = self.resetQua[2] turState.pose.orientation.w = self.resetQua[3] rospy.wait_for_service('/gazebo/set_model_state') set_model_prox = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) set_model_prox(turState) return
class MobileRobotGymEnv(gym.Env): def __init__(self): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.past_distance = 0. self.past_action = np.array([0.,0.]) obs_high = np.concatenate((np.array([3.5] * 10) ,np.array([0.22, 2.0]), np.array([10.]), np.array([10.]) )) obs_low = np.concatenate((np.array([0.] * 10) ,np.array([0.0, -2.0]), np.array([-10.]), np.array([-10.]) )) self.action_space = spaces.Box(low=np.array([0., -2.0]) ,high=np.array([0.22, 2.0]), dtype=np.float32) self.observation_space = spaces.Box(low=obs_low , high=obs_high, dtype=np.float32) self.count_collision, self.count_goal = 0,0 #Keys CTRL + c will stop script rospy.on_shutdown(self.shutdown) def stats(self): return self.count_collision, self.count_goal def shutdown(self): #you can stop turtlebot by publishing an empty Twist rospy.loginfo("Stopping TurtleBot") self.pub_cmd_vel.publish(Twist()) rospy.sleep(1) def getGoalDistace(self): goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.past_distance = goal_distance return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) #print 'yaw', yaw #print 'gA', goal_angle heading = goal_angle - yaw #print 'heading', heading if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 3) def getState(self, scan, type='normalized'): scan_range = [] heading = self.heading min_range = 0.15 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): if type=='normal':scan_range.append(3.5) if type=='normalized':scan_range.append(1) # 3.5 / 3.5 elif np.isnan(scan.ranges[i]): scan_range.append(0) else: if type=='normal':scan_range.append(scan.ranges[i]) if type=='normalized':scan_range.append(scan.ranges[i] / 3.5) if min_range > min(scan_range) > 0: done = True for pa in self.past_action: scan_range.append(pa) current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y),2) if current_distance < 0.4: self.get_goalbox = True return scan_range + [heading, current_distance], done def setReward(self, state, done,type='scaled'): current_distance = state[-1] heading = state[-2] distance_rate = (self.past_distance - current_distance) if distance_rate > 0: if type=='unscaled': reward = 200.*distance_rate elif type=='scaled': reward = 0.2*distance_rate if distance_rate <= 0: if type=='unscaled': reward = -8. elif type=='scaled': reward = -0.01 self.past_distance = current_distance if done: rospy.loginfo("Collision!!") if type=='normal': reward = -500. elif type=='scaled': reward = -1. self.count_collision+=1 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") if type=='normal': reward = 500. elif type=='scaled': reward = 0.95 self.count_goal+=1 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): linear_vel = action[0] ang_vel = action[1] vel_cmd = Twist() vel_cmd.linear.x = linear_vel vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) self.past_action = action reward = self.setReward(state, done) return np.asarray(state), reward, done ,{} def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False else: self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True) self.goal_distance = self.getGoalDistace() state, done = self.getState(data) return np.asarray(state)
class Env(): def __init__(self, ): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 #QinjieLin self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) # self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) # self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) # self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yaw_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.dis_weight = rospy.get_param('/dis_weight', 0.0) self.lin_weight = rospy.get_param('/lin_weight', 0.0) self.obs_weight = rospy.get_param('/obs_weight', 0.0) self.ori_weight = rospy.get_param('/ori_weight', 0.0) self.col_weight = rospy.get_param('/col_weight', 0.0) self.goal_weight = rospy.get_param('/goal_weight', 0.0) self.rot_weight = rospy.get_param('/rot_weight', 0.0) self.laserStep = rospy.get_param('/laser_step', 60) self.stepTime = rospy.get_param('/step_time', 0.5) def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.2 done = False full_scan_range = [] range_dim = int(self.laserStep) num_dim = int(len(scan.ranges) / range_dim) for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): full_scan_range.append(3.5) elif np.isnan(scan.ranges[i]): full_scan_range.append(0) else: full_scan_range.append(scan.ranges[i]) for i in range(num_dim): begin = i * range_dim end = (i + 1) * range_dim - 1 if (end >= len(scan.ranges)): end = -1 scan_range.append(min(full_scan_range[begin:end])) obstacle_min_range = round(min(scan_range), 2) obstacle_angle = np.argmin(scan_range) if min_range > min(scan_range) > 0: done = True self.lastDistance = self.currentDistance #qinjielin current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.currentDistance = current_distance #qinjielin if current_distance < 0.2: self.get_goalbox = True return scan_range + [ heading, current_distance, obstacle_min_range, obstacle_angle ], done # return [heading, current_distance, obstacle_min_range, obstacle_angle], done def setReward(self, state, done, action): yaw_reward = [] current_distance = state[-3] heading = state[-4] obs_min_range = state[-2] forward_distance = self.lastDistance - current_distance distance_reward = forward_distance * 10 if (forward_distance <= 0): distance_reward -= 0.5 distance_reward = distance_reward * self.dis_weight rot_reward = 0 if (math.fabs(action[0]) > 0.7): rot_reward = -0.5 * math.fabs(action[0]) * self.rot_weight ori_reward = 0 ori_cos = math.cos(heading) if ((math.fabs(heading) < 1.0) and (forward_distance > 0)): ori_reward = ori_cos * 0.2 * self.ori_weight lin_reward = 0 if ((action[1] >= 0.4) and (forward_distance > 0)): lin_reward = action[1] * 0.5 * self.lin_weight obs_reward = 0 if ((obs_min_range > 0.2) and (obs_min_range < 2.0)): obs_reward -= (2.0 - obs_min_range) * 1 * self.obs_weight #0.3 reward = distance_reward + rot_reward + lin_reward + ori_reward + obs_reward # print("state:",np.round(state,2)) # print("choose action:",np.round(action,3)) # print("move:",round(distance_reward,3),"rot:",round(rot_reward,3), "ori:",round(ori_reward,2),"obs:",round(obs_reward,2),"total:",round(reward,3)) if done: rospy.loginfo("Collision!!") reward = -10 * self.col_weight #-15 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") self.numSuccess += 1 reward = 10 * self.goal_weight #15 self.pub_cmd_vel.publish(Twist()) self.reset() self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.get_goalbox = False self.endTime = time.time() exeTime = self.endTime - self.startTime self.startTime = time.time() print("exetime:", exeTime) return reward def step(self, action): max_angular_vel = 2.0 max_linear_vel = 0.6 ang_vel = ((action[0] / 2)) * max_angular_vel #0.15 linear_vel = ((action[1] + 2) / 4) * max_linear_vel #0.15 # ang_vel=action[0] # linear_vel = action[1] if (math.fabs(ang_vel) < 0.5): ang_vel = 0 # if(linear_vel<0.15): # linear_vel = 0 vel_cmd = Twist() vel_cmd.linear.x = linear_vel # change when learning vel_cmd.angular.z = ang_vel # change when learing tansAction = [ang_vel, linear_vel] self.pub_cmd_vel.publish(vel_cmd) # rospy.sleep(0.5) rospy.sleep(self.stepTime) #qinjielin vel_cmd = Twist() self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, tansAction) return np.asarray(state), reward, done def reset(self): # print("waiting service") # rospy.wait_for_service('gazebo/reset_simulation') # # print("got service") # try: # self.reset_proxy() # except (rospy.ServiceException) as e: # print("gazebo/reset_simulation service call failed") self.stop_turtlebot3() # print("getting scan") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: # print("getting scan pass") pass # print("got scan") if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False # print("init goal done ") self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.startTime = time.time() state, done = self.getState(data) # print("env rest done") return np.asarray(state) def reset_turtlebot3(self): turState = ModelState() turState.model_name = "turtlebot3_burger" turState.pose.position.x = self.resetX turState.pose.position.y = self.resetY turState.pose.position.z = self.resetZ turState.pose.orientation.x = self.resetQua[0] turState.pose.orientation.y = self.resetQua[1] turState.pose.orientation.z = self.resetQua[2] turState.pose.orientation.w = self.resetQua[3] rospy.wait_for_service('/gazebo/set_model_state') set_model_prox = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) set_model_prox(turState) return def stop_turtlebot3(self): self.pub_cmd_vel.publish(Twist())
class Env: """ Environment class that handles the connection to the Gazebo engine Also implements the necessary functions for a RL environment """ def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.current_goal_distance = None self.previous_goal_distance = None self.action_size = action_size self.initGoal = True self.goal_reached = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.get_odometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() self.start_goal_distance = 0 def get_goal(self): return [self.goal_x, self.goal_y] def get_position(self): return [self.position.x, self.position.y] def get_goal_distance(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def get_odometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) self.current_goal_distance = self.get_goal_distance() def get_state(self, scan, reset_if_wall_hit=False): done = False min_range = 0.13 scan_normalize_const = 3.5 goal_distance_normalize = 2 heading_normalize = math.pi scan_range = [] n_scan_ranges = len(scan.ranges) for i in range(n_scan_ranges): if scan.ranges[i] == float('Inf'): scan_range.append(1) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i] / scan_normalize_const) if reset_if_wall_hit and min_range / scan_normalize_const > min( scan_range) > 0: done = True if self.current_goal_distance < 0.2: self.goal_reached = True if not reset_if_wall_hit: done = True heading = self.heading / heading_normalize current_goal_distance = self.current_goal_distance / goal_distance_normalize obstacle_min_range = round(min(scan_range) / scan_normalize_const, 2) obstacle_angle = np.argmin(scan_range) / n_scan_ranges * 2 - 1 return scan_range + [ heading, current_goal_distance, obstacle_min_range, obstacle_angle ], done def get_reward(self, state, done, action): reward = reward_service.punish_no_sparse(self.goal_reached, self.get_goal_distance()) if done: rospy.loginfo("Collision!!") self.pub_cmd_vel.publish(Twist()) if self.goal_reached: rospy.loginfo("Goal!!") self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.get_position( True, delete=True) self.start_goal_distance = self.get_goal_distance() self.previous_goal_distance = self.start_goal_distance self.goal_reached = False return reward def step(self, action): max_angular_vel = 1.5 ang_vel = ((self.action_size - 1) / 2 - action) * max_angular_vel * 0.5 vel_cmd = Twist() if action == 0 or action == 4: vel_cmd.linear.x = 0 else: vel_cmd.linear.x = 0.15 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.get_state(data) reward = self.get_reward(state, done, action) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except rospy.ServiceException as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.get_position() self.initGoal = False self.start_goal_distance = self.get_goal_distance() self.previous_goal_distance = self.start_goal_distance state, done = self.get_state(data) return np.asarray(state)
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 #QinjieLin self.currentDistance = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): # scan_range = [] # heading = self.heading # min_range = 0.13 # done = False # # for i in range(len(scan.ranges)): # if scan.ranges[i] == float('Inf'): # scan_range.append(3.5) # elif np.isnan(scan.ranges[i]): # scan_range.append(0) # else: # scan_range.append(scan.ranges[i]) # # obstacle_min_range = round(min(scan_range), 2) # obstacle_angle = np.argmin(scan_range) # if min_range > min(scan_range) > 0: # done = True # # current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) # if current_distance < 0.2: # self.get_goalbox = True scan_range = [] heading = self.heading min_range = 0.13 done = False full_scan_range = [] range_dim = 30 num_dim = int(len(scan.ranges) / range_dim) for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): full_scan_range.append(3.5) elif np.isnan(scan.ranges[i]): full_scan_range.append(0) else: full_scan_range.append(scan.ranges[i]) for i in range(num_dim): begin = i * range_dim end = (i + 1) * range_dim - 1 if (end >= len(scan.ranges)): end = -1 scan_range.append(min(full_scan_range[begin:end])) obstacle_min_range = round(min(scan_range), 2) obstacle_angle = np.argmin(scan_range) if min_range > min(scan_range) > 0: done = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.currentDistance = current_distance #qinjielin if current_distance < 0.2: self.get_goalbox = True return scan_range + [ heading, current_distance, obstacle_min_range, obstacle_angle ], done def setReward(self, state, done, action): yaw_reward = [] current_distance = state[-3] heading = state[-4] # for i in range(5): # angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2 # tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) # yaw_reward.append(tr) distance_punish = 0 forward_distance = self.lastDistance - current_distance distance_reward = (self.goal_distance - current_distance) * 10 if (forward_distance) < 0: #qinjielin distance_punish = forward_distance * 5 distance_reward = distance_reward + distance_punish rot_reward = math.cos(heading) * 5 rot_punish = -1 * math.fabs(action) * 5 rot_reward = rot_reward + rot_punish reward = distance_reward + rot_reward print("move_reward:", distance_reward, "rot_reward:", rot_reward, "total_reward:", reward) # distance_rate = 2 ** (current_distance / self.goal_distance) # reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) if done: rospy.loginfo("Collision!!") reward = -150 #-150 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 200 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.get_goalbox = False return reward def step(self, action): self.lastDistance = self.currentDistance max_angular_vel = 1.5 # ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5 ang_vel = action vel_cmd = Twist() vel_cmd.linear.x = 0.15 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) rospy.sleep(0.8) #qinjielin data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): # print("waiting service") rospy.wait_for_service('gazebo/reset_simulation') # print("got service") try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") # print("getting scan") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: # print("getting scan pass") pass # print("got scan") if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False # print("init goal done ") self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance state, done = self.getState(data) # print("env rest done") return np.asarray(state)
class Env(): def __init__(self, action_size): self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.action_size = action_size self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5) self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry) self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty) self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty) self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty) self.respawn_goal = Respawn() def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.15 # changed from 0.13 to 0.15 done = False for i in range(len(scan.ranges)): if scan.ranges[i] == float('Inf'): scan_range.append(3.5) elif np.isnan(scan.ranges[i]): scan_range.append(0) else: scan_range.append(scan.ranges[i]) if min_range > min(scan_range) > 0: done = True current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) if current_distance < 0.2: self.get_goalbox = True return scan_range + [heading, current_distance], done def setReward(self, state, done, action): yaw_reward = [] current_distance = state[-1] heading = state[-2] for i in range(5): angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2 tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0]) yaw_reward.append(tr) distance_rate = 2**(current_distance / self.goal_distance) reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate) ### # rospy.loginfo("------ Reward is: ["+str(reward)+"] ----------------") ### if done: rospy.loginfo("Collision!!") reward = -200 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") reward = 200 self.pub_cmd_vel.publish(Twist()) self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.get_goalbox = False return reward def step(self, action): max_angular_vel = 1.5 ang_vel = ((self.action_size - 1) / 2 - action) * max_angular_vel * 0.5 vel_cmd = Twist() vel_cmd.linear.x = 0.15 vel_cmd.angular.z = ang_vel self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, action) return np.asarray(state), reward, done def reset(self): rospy.wait_for_service('gazebo/reset_simulation') try: self.reset_proxy() except (rospy.ServiceException) as e: print("gazebo/reset_simulation service call failed") data = None while data is None: try: data = rospy.wait_for_message('scan', LaserScan, timeout=5) except: pass if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False self.goal_distance = self.getGoalDistace() state, done = self.getState(data) return np.asarray(state)
class Env(): def __init__(self, index=-1): if (index == -1): print("########## index == -1 ##########") self.cmd_topic = "/cmd_vel" self.odom_topic = "/odom" self.scan_topic = "/base_scan" self.crash_topic = "/is_crashed" else: self.cmd_topic = "robot_" + str(index) + "/cmd_vel" self.odom_topic = "robot_" + str(index) + "/odom" self.scan_topic = "robot_" + str(index) + "/base_scan" self.crash_topic = "robot_" + str(index) + "/is_crashed" self.reset_service = 'reset_positions' self.goal_x = 0 self.goal_y = 0 self.heading = 0 self.lastDistance = 0 self.currentDistance = 0 self.initGoal = True self.get_goalbox = False self.position = Pose() self.pub_cmd_vel = rospy.Publisher(self.cmd_topic, Twist, queue_size=5) self.sub_odom = rospy.Subscriber(self.odom_topic, Odometry, self.getOdometry) self.sub_crash = rospy.Subscriber(self.crash_topic, Int8, self.crash_callback) self.reset_stage = rospy.ServiceProxy(self.reset_service, Empty) self.respawn_goal = Respawn() self.resetX = rospy.get_param('/x_pos', 0.0) self.resetY = rospy.get_param('/y_pos', 0.0) self.resetZ = rospy.get_param('/z_pos', 0.0) self.resetYaw = rospy.get_param('/yaw_angle', 0.0) self.resetQua = quaternion_from_euler(0.0, 0.0, self.resetYaw) self.startTime = time.time() self.endTime = 0 self.numSuccess = 0 self.dis_weight = rospy.get_param('/dis_weight', 0.0) self.lin_weight = rospy.get_param('/lin_weight', 0.0) self.obs_weight = rospy.get_param('/obs_weight', 0.0) self.ori_weight = rospy.get_param('/ori_weight', 0.0) self.col_weight = rospy.get_param('/col_weight', 0.0) self.goal_weight = rospy.get_param('/goal_weight', 0.0) self.rot_weight = rospy.get_param('/rot_weight', 0.0) self.laserStep = rospy.get_param('/laser_step', 60) self.stepTime = rospy.get_param('/step_time', 0.5) self.realWorldFlag = rospy.get_param('/real_world') self.simScale = 500.0 if (self.realWorldFlag): self.simScale = 1.0 rospy.sleep(1.) def crash_callback(self, flag): self.is_crashed = flag.data def get_crash_state(self): return self.is_crashed def getGoalDistace(self): goal_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) return goal_distance def getOdometry(self, odom): self.position = odom.pose.pose.position orientation = odom.pose.pose.orientation orientation_list = [ orientation.x, orientation.y, orientation.z, orientation.w ] _, _, yaw = euler_from_quaternion(orientation_list) goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x) heading = goal_angle - yaw if heading > pi: heading -= 2 * pi elif heading < -pi: heading += 2 * pi self.heading = round(heading, 2) def getState(self, scan): scan_range = [] heading = self.heading min_range = 0.3 done = False full_scan_range = [] range_dim = int(self.laserStep) num_dim = int(len(scan.ranges) / range_dim) # transform because the laser is different from real world and gazebo tranformed_scan = range(360) tranformed_scan[0:180] = scan.ranges[180:360] tranformed_scan[180:360] = scan.ranges[0:180] for i in range(len(tranformed_scan)): if tranformed_scan[i] == float('Inf'): full_scan_range.append(3.5) elif tranformed_scan[i] == 0.0: full_scan_range.append(3.5) elif np.isnan(tranformed_scan[i]): full_scan_range.append(0) else: full_scan_range.append(tranformed_scan[i]) for i in range(num_dim): begin = i * range_dim end = (i + 1) * range_dim - 1 if (end >= len(tranformed_scan)): end = -1 scan_range.append(min(full_scan_range[begin:end])) obstacle_min_range = round(min(scan_range), 2) obstacle_angle = np.argmin(scan_range) is_crash = self.get_crash_state() if (is_crash == 1): done = True self.lastDistance = self.currentDistance #qinjielin current_distance = round( math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2) self.currentDistance = current_distance #qinjielin if current_distance < 0.2: self.get_goalbox = True return scan_range + [ heading, current_distance, obstacle_min_range, obstacle_angle ], done def setReward(self, state, done, action): yaw_reward = [] current_distance = state[-3] heading = state[-4] obs_min_range = state[-2] forward_distance = self.lastDistance - current_distance distance_reward = forward_distance * 10 if (forward_distance <= 0): distance_reward -= 0.5 distance_reward = distance_reward * self.dis_weight rot_reward = 0 if (math.fabs(action[0]) > 0.7): rot_reward = -0.5 * math.fabs(action[0]) * self.rot_weight ori_reward = 0 ori_cos = math.cos(heading) if ((math.fabs(heading) < 1.0) and (forward_distance > 0)): ori_reward = ori_cos * 0.2 * self.ori_weight lin_reward = 0 if ((action[1] >= 0.4) and (forward_distance > 0)): lin_reward = action[1] * 0.5 * self.lin_weight obs_reward = 0 if ((obs_min_range > 0.2) and (obs_min_range < 2.0)): obs_reward -= (2.0 - obs_min_range) * 1 * self.obs_weight #0.3 reward = distance_reward + rot_reward + lin_reward + ori_reward + obs_reward if done: rospy.loginfo("Collision!!") reward = -10 * self.col_weight #-15 self.pub_cmd_vel.publish(Twist()) if self.get_goalbox: rospy.loginfo("Goal!!") self.numSuccess += 1 reward = 10 * self.goal_weight #15 self.pub_cmd_vel.publish(Twist()) self.reset() self.goal_x, self.goal_y = self.respawn_goal.getPosition( True, delete=True) self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.get_goalbox = False self.endTime = time.time() exeTime = self.endTime - self.startTime self.startTime = time.time() print("exetime:", exeTime) return reward def step(self, action): max_angular_vel = 2.0 max_linear_vel = 0.6 scale = self.simScale ang_vel = ((action[0] / 2)) * max_angular_vel #0.15 linear_vel = ((action[1] + 2) / 4) * max_linear_vel #0.15 if (math.fabs(ang_vel) < 0.5): ang_vel = 0 vel_cmd = Twist() vel_cmd.linear.x = linear_vel #* scale # change when learning vel_cmd.angular.z = ang_vel #*scale # change when learing transAction = [linear_vel, ang_vel] self.pub_cmd_vel.publish(vel_cmd) # rospy.sleep(0.001) rospy.sleep(self.stepTime / scale) # time.sleep(self.stepTime) # vel_cmd = Twist() # if(not self.realWorldFlag): # self.pub_cmd_vel.publish(vel_cmd) data = None while data is None: try: data = rospy.wait_for_message(self.scan_topic, LaserScan, timeout=5) except: pass state, done = self.getState(data) reward = self.setReward(state, done, transAction) # print("state in env:",state) # print("action:",transAction) return np.asarray(state), reward, done def reset(self): # print("waiting service") # rospy.wait_for_service('gazebo/reset_simulation') # # print("got service") # try: # self.reset_proxy() # except (rospy.ServiceException) as e: # print("gazebo/reset_simulation service call failed") if (self.realWorldFlag): print("just stop turtlebot in reset!") self.stop_turtlebot3() else: print("reset turtlebot in reset!") self.reset_turtlebot3() # print("getting scan") data = None while data is None: try: data = rospy.wait_for_message(self.scan_topic, LaserScan, timeout=5) except: # print("getting scan pass") pass # print("got scan") if self.initGoal: self.goal_x, self.goal_y = self.respawn_goal.getPosition() self.initGoal = False # print("init goal done ") self.goal_distance = self.getGoalDistace() self.currentDistance = self.goal_distance #qinjielin self.lastDistance = self.currentDistance self.startTime = time.time() state, done = self.getState(data) return np.asarray(state) def reset_turtlebot3(self): self.reset_stage() rospy.sleep(0.5) return def stop_turtlebot3(self): self.pub_cmd_vel.publish(Twist())