def getReward(surface, agent, target, otherAgents, observation, action, prevtargetAng, prevtargetDist): #REWARD FUNCTION # Determine the reward for the previous action taken # Reward for movement reward = -2 if action == 0: # FORWARD reward += 0 elif action == 1: # LEFT reward += 0 elif action == 2: # RIGHT reward += 0 elif action == 3: # STOP if min(observation[3:6]) < 1: reward -= 0 else: reward -= 100 # Reward for facing the goal or moving toward facing the goal if observation[-2] < 2 or prevtargetAng > observation[-2]: reward += 2 prevtargetAng = observation[-2] # Agent to Agent reward # Dont get too close for oi in range(len(otherAgents)): if dispUtils.getInterAgentDistace(agent, otherAgents[oi]) < 200: reward -= 5 if dispUtils.getInterAgentDistace(agent, otherAgents[oi]) < 100: reward -= 5 + (100 - dispUtils.getInterAgentDistace( agent, otherAgents[oi]) / 100) * 5 # Punish collision if dispUtils.getInterAgentDistace(agent, otherAgents[oi]) < agent['size'] * 2: reward -= 1000 # Goal and Obstacle related Reward if dispUtils.getInterAgentDistace(agent, target) < 50: reward += 300 #at Goal elif dispUtils.getInterAgentDistace(agent, target) < prevtargetDist - 10: reward += 5 # Closer to goal elif dispUtils.checkCollision(observation[0:9], 55) == True: reward -= 1000 #collide with obstacle else: reward -= 5 prevtargetDist = dispUtils.getInterAgentDistace(agent, target) # Punnish driving foward close to walls # for scan in observation[4:5]: # if scan <= 1: # reward-= 5 + ((1-scan)/1)*5 return reward, prevtargetAng, prevtargetDist
sys.exit() # Draw all the goals and obstacles for g in range(len(goals)): dispUtils.drawGoal(windowSurface,goals[g],agentColours[g%len(agentColours)]) dispUtils.drawObstacles(windowSurface, [0,0,0], obstacles) dispUtils.drawObstacles(windowSurface, map_data[2], map_data[0]) # Simulate Each Agent if sum(goalsHit) < (goalPerEpoch+1)*numberOfAgents: for i in range(numberOfAgents): if goalsHit[i]<goalPerEpoch+1: #Generate the laser scan data for each agent and check for any collisions laserScanData = dispUtils.laserScan(windowSurface, [100,100,100], robots[i], agentColours, 10, [-math.pi/2,math.pi/2], 500,54) if dispUtils.checkCollision(laserScanData,55) == True: done = True isFinal = True #print(laserScanData) # Create list of other Agents others = robots[:i] others.extend(robots[(i+1):]) # Check for collisions with other robots for otherRobot in others: if dispUtils.getInterAgentDistace(robots[i],otherRobot)< robots[i]['size']*2: done = True isFinal = True # Draw the robot and create the new observation list
def is_done(self,observation): if dispUtils.checkCollision(observation[0:10],55) == True : #or self.curr_step >= self.max_steps self.done = True
def getReward(surface, agent, target, otherAgents, observation, action, prevtargetAng, prevtargetDist): #REWARD FUNCTION # Determine the reward for the previous action taken # Reward for movement reward = 0 if action == 0: # FORWARD # if min(observation[2:7])<1.25: # reward -= 10 # else: # reward += 1 reward += 0 elif action == 1: # LEFT # if min(observation[0:3])<1.25: # reward -= 10 reward += 0 elif action == 2: # RIGHT # if min(observation[6:9])<1.25: # reward -= 10 reward += 0 elif action == 3: # STOP if min(observation[0:9])<2: reward -= 0 else: reward -= 1 # if min(observation[0:9])<1.5: # reward -= 20 otherAgentDataIndex = [12,16] #check if another agent is in the way pathClear = True blockingDist = 100 for i in otherAgentDataIndex: correctAng = abs(math.atan(25/(dispUtils.getInterAgentDistace(agent,target)+1))) relAngleDif = math.radians(observation[i+1]) - math.radians(observation[-3]) if relAngleDif > math.pi: relAngleDif = relAngleDif-2*math.pi elif relAngleDif < -math.pi: relAngleDif = relAngleDif+2*math.pi # if relAngleDif > -correctAng*2 and relAngleDif < correctAng*2: # if observation[i] < dispUtils.getInterAgentDistace(agent,target): # pathClear = False if observation[i]*math.sin(relAngleDif) > -0.5 and observation[i]*math.sin(relAngleDif) < 0.5: if observation[i]*math.cos(relAngleDif) > 0 and observation[i]*math.cos(relAngleDif) < dispUtils.getInterAgentDistace(agent,target)/100: pathClear = False if observation[i]*math.cos(relAngleDif) < blockingDist: blockingDist = observation[i]*math.cos(relAngleDif) # print("path not Clear" + str(observation[i]*math.cos(relAngleDif))+ str(dispUtils.getInterAgentDistace(agent,target)/100)) # if pathClear: # Reward for facing the goal or moving toward facing the goal # correctAng = abs(math.degrees(math.atan(25/(dispUtils.getInterAgentDistace(agent,target)+1)))) # if correctAng < 2: # correctAng = 2 # if observation[-3] <= correctAng and observation[-3] >= -correctAng: # if action != 3 and action != 2 and action != 1: # reward+=3 # else: # if observation[-3] > 0 and prevtargetAng>=0: # if prevtargetAng > observation[-3] or observation[-3] < correctAng: # reward+=1 # else: # reward-=3 # elif observation[-3] < 0 and prevtargetAng <= 0: # if prevtargetAng < observation[-3] or observation[-3] > -correctAng: # reward+=1 # else: # reward-=3 # else: # reward-=3 # prevtargetAng = observation[-3] # else: # correctAng = abs(math.degrees(math.atan(25/blockingDist))) # if correctAng < 2: # correctAng = 2 # # # if (observation[-3] >= correctAng and observation[-3] <= correctAng+45): # if action != 3 and action != 2 and action != 1: # reward+=2 # elif (observation[-3] <= -correctAng and observation[-3] >= -(correctAng+45)): # if action != 3: # reward-=0 # else: # if observation[-3] > 0 and prevtargetAng >= 0: # if observation[-3] < correctAng and prevtargetAng < observation[-3]: # reward+=1 # elif observation[-3] > correctAng+45 and prevtargetAng > observation[-3]: # reward+=1 # else: # reward-=3 # elif observation[-3] < 0 and prevtargetAng <= 0: # if observation[-3] > -correctAng and prevtargetAng < observation[-3]: # reward+=1 # elif observation[-3] < -correctAng-45 and prevtargetAng < observation[-3]: # reward+=1 # else: # reward-=3 # else: # reward-=3 # # prevtargetAng = observation[-3] # Agent to Agent reward # Dont get too close for oi in range(len(otherAgents)): # Punish collision if dispUtils.getInterAgentDistace(agent,otherAgents[oi])< agent['size']*2.5: reward-=100 for i in otherAgentDataIndex: #Punish Distance to Other Agents if observation[i]< 1.5: reward-=5 if observation[i] < 1: reward-= 5 + (1-observation[i]/1)*5 if observation[i+2] > 0 and observation[11]>0 and observation[i]<5 : otherAgentHeading = math.radians(observation[i+3]) relativeAngToOtherAgent = math.radians(observation[i+1]) relativeAngFromOtherAgent = relativeAngToOtherAgent - math.pi - otherAgentHeading if relativeAngFromOtherAgent > math.pi: relativeAngFromOtherAgent = relAngleDif-2*math.pi elif relativeAngFromOtherAgent < -math.pi: relativeAngFromOtherAgent = relAngleDif+2*math.pi if otherAgentHeading<= math.pi/4 and otherAgentHeading>= -math.pi/4 : if relativeAngFromOtherAgent > - 3*math.pi/4 and relativeAngFromOtherAgent < -math.pi/2: reward-=10 # elif otherAgentHeading< 3 * math.pi/4 and otherAgentHeading> math.pi/4: # if relativeAngFromOtherAgent < math.pi/4 and relativeAngFromOtherAgent > -math.pi/4: # reward-=0.5 # elif otherAgentHeading> -3 * math.pi/4 and otherAgentHeading< -math.pi/4: # if relativeAngFromOtherAgent > -math.pi/4 and relativeAngFromOtherAgent < math.pi/4: # reward-=0.5 elif otherAgentHeading<= -3 * math.pi/4 or otherAgentHeading>= 3*math.pi/4: if relativeAngFromOtherAgent > -math.pi/2 and relativeAngFromOtherAgent < 0: reward-=10 # if (relativeAngToOtherAgent < 0 and relativeAngToOtherAgent > -math.pi/2) or (relativeAngToOtherAgent >= 0 and observation[i]*math.sin(relativeAngToOtherAgent)<0.7): # reward-=1 # else: # if observation[i+3]<= math.pi/4 and observation[i+3]>= -math.pi/4 : # if observation[i+1] > math.pi/4 and observation[i+1] < math.pi/2: # reward-=1 # Goal and Obstacle related Reward if observation[-1]==1: reward += 100 #at Goal elif dispUtils.checkCollision(observation[0:9],55) == True: reward -= 100 #collide with obstacle # elif dispUtils.getInterAgentDistace(agent,target) < 100: # reward -= 0 # elif dispUtils.getInterAgentDistace(agent,target) < prevtargetDist - 5: # reward += 6 # Closer to goal # elif dispUtils.getInterAgentDistace(agent,target) < prevtargetDist + 5 and dispUtils.getInterAgentDistace(agent,target) > prevtargetDist - 5: # reward -= 0 # else: # reward -= 6 # prevtargetDist = dispUtils.getInterAgentDistace(agent,target) # Punnish driving foward close to walls # for scan in observation[0:9]: # if scan <= 1.5: # reward-=0.5 # if scan <= 1: # reward-= 0.2 + ((1-scan)/1)*0.2 # if scan <= 0.65: # reward-= 2 return reward, prevtargetAng, prevtargetDist