def reward(prev_state, new_state): # We will primarily calculate initial reward # based on distance between leader and the flying bird # Calculate the previous distance old_learner_loc = prev_state[0] old_leader_loc = prev_state[1] distance_old = distance(old_learner_loc, old_leader_loc) # Calculate new distance new_learner_loc = newState[0] new_leader_loc = newState[1] distance_new = distance(new_learner_loc, new_leader_loc) #distance_new = distance(new_learner_loc, old_leader_loc) reward = 0 if distance_new < crashdistance: #reward = - 600*(1/distance_new) reward = -600 #print reward elif distance_old > distance_new: #reward = distance_new / float(8) reward = 10 # reward += (1/distance_new) elif distance_old < distance_new: #reward = - distance_new / float(2) reward = -5 return reward
def isfollowing(state, newState): # Define following as being within 20-30 units # See if it took a step toward the leader correctly oldDist = distance(state[0], state[1]) newDist = distance(newState[0], newState[1]) follow = 0 if oldDist >= newDist: follow = 1 # Check to see alignemnt angleBoid = (newState[0][2] + 90) % 360 if angleBoid > 180: angleBoid = 180 - (angleBoid % 180) angleLeader = newState[1][2] if angleLeader > 180: angleLeader = 180 - (angleLeader % 180) angleDif = math.fabs(angleBoid - angleLeader) # Check we hit the leader crash = 0 if distance(newState[0], newState[1]) < crashdistance - 2: crash = 1 # Check if maintained a good follow distance (i.e. stayed within 7 units of leader) stay_follow = 0 if distance(newState[0], newState[1]) > crashdistance - 2 and distance( newState[0], newState[1]) <= crashdistance + 10: stay_follow = 1 return (follow, crash, stay_follow, angleDif)
def reward(prev_state, new_state): # We will primarily calculate initial reward # based on distance between leader and the flying bird # Calculate the previous distance old_learner_loc = prev_state[0] old_leader_loc = prev_state[1] distance_old = distance(old_learner_loc, old_leader_loc) # Calculate new distance new_learner_loc = newState[0] new_leader_loc = newState[1] distance_new = distance(new_learner_loc, new_leader_loc) reward = 0 # Base reward on how the distance changes if distance_new < crashdistance: reward = - 2*(1/distance_new) elif distance_old > distance_new: reward = distance_new / float(8) # reward += (1/distance_new) elif distance_old < distance_new: reward = - distance_new / float(2) # reward -= (1/distance_new) if not allGood(new_learner_loc[0], new_learner_loc[1]): return -1000 return reward
def reward(prev_state, new_state): # Unpac the states boid, leader, birds, velocity = prev_state update_boid, update_leader, update_birds, velocity = new_state # Calculate the distance to the leader before step dist_leader = distanceObj(boid, leader) # Get distance to the centroid of flock dist = dist_leader avg_x = leader.x avg_y = leader.y for bird in birds: avg_x += bird.x avg_y += bird.y centroid = (avg_x / float(len(birds) + 1), avg_y / float(len(birds) + 1)) # Distance to centroid before action dist_center = distance((boid.x, boid.y), centroid) # Updated distance to leader updated_distance = distanceObj(update_boid, update_leader) # Calculate birds that are too close # And re-calc centroid number_too_close = 0 close_birds = [] avg_x = update_leader.x avg_y = update_leader.y if updated_distance < 30: close_birds.append(updated_distance) bird_distances = [] for bird in update_birds: new_dist = distanceObj(update_boid, bird) # See if we are too close if new_dist < 30: number_too_close += 1 close_birds.append(new_dist) avg_x += bird.x avg_y += bird.y updated_centroid = (avg_x / float(len(birds) + 1), avg_y / float(len(birds) + 1)) #Start calculating reward # Give bad negative reward if we are too close! reward = 0 if len(close_birds) > 0: #reward = -200 / float(close_birds[0]) reward = -20 # Two CLOSE birds!!! if len(close_birds) > 1: #reward -= 200 / float(close_birds[1]) reward *= 2 return reward # Move toward the centroid updated_dist_center = distance((update_boid.x, update_boid.y), updated_centroid) #updated_dist_center = distance((update_boid.x, update_boid.y), centroid) #if dist_leader < 50: if updated_dist_center <= dist_center: #reward += 1 + math.fabs(updated_dist_center - dist_center) reward += 2 else: reward += 0 # Move toward the leader with more importance than the center updated_distance = distanceObj(update_boid, update_leader) #updated_distance = distanceObj(update_boid, leader) #if dist_leader >= 50: if dist_leader >= updated_distance: #reward += 5 + 3 * math.fabs(updated_distance - dist_leader) #reward += 10 #reward += 3 + math.fabs(updated_distance - dist_leader) reward += 5 #reward += 100 else: #reward -= 2 - 2 * math.fabs(updated_distance - dist_leader) #reward -= 1 + math.fabs(updated_distance - dist_leader) reward -= 1 #reward -= 5 return reward
def isfollowing(state): # Define following as being within 20-30 units #if not(distance(state[0], state[1]) > 20 and distance(state[0], state[1]) <= 35): #print distance(state[0], state[1]) return distance(state[0], state[1]) > crashdistance - 5 and distance( state[0], state[1]) <= crashdistance + 15