Ejemplo n.º 1
0
def getReward(surface, agent, target, otherAgents, observation, action,
              prevtargetAng, prevtargetDist):
    #REWARD FUNCTION
    # Determine the reward for the previous action taken
    # Reward for movement
    reward = -2
    if action == 0:  # FORWARD
        reward += 0
    elif action == 1:  # LEFT
        reward += 0
    elif action == 2:  # RIGHT
        reward += 0
    elif action == 3:  # STOP
        if min(observation[3:6]) < 1:
            reward -= 0
        else:
            reward -= 100

    # Reward for facing the goal or moving toward facing the goal
    if observation[-2] < 2 or prevtargetAng > observation[-2]:
        reward += 2
    prevtargetAng = observation[-2]

    # Agent to Agent reward
    # Dont get too close
    for oi in range(len(otherAgents)):
        if dispUtils.getInterAgentDistace(agent, otherAgents[oi]) < 200:
            reward -= 5
            if dispUtils.getInterAgentDistace(agent, otherAgents[oi]) < 100:
                reward -= 5 + (100 - dispUtils.getInterAgentDistace(
                    agent, otherAgents[oi]) / 100) * 5

        # Punish collision
        if dispUtils.getInterAgentDistace(agent,
                                          otherAgents[oi]) < agent['size'] * 2:
            reward -= 1000

    # Goal and Obstacle related Reward
    if dispUtils.getInterAgentDistace(agent, target) < 50:
        reward += 300  #at Goal
    elif dispUtils.getInterAgentDistace(agent, target) < prevtargetDist - 10:
        reward += 5  # Closer to goal
    elif dispUtils.checkCollision(observation[0:9], 55) == True:
        reward -= 1000  #collide with obstacle
    else:
        reward -= 5

    prevtargetDist = dispUtils.getInterAgentDistace(agent, target)

    # Punnish driving foward close to walls
    # for scan in observation[4:5]:
    #     if scan <= 1:
    #         reward-= 5 + ((1-scan)/1)*5

    return reward, prevtargetAng, prevtargetDist
        sys.exit()

    # Draw all the goals and obstacles
    for g in range(len(goals)):
        dispUtils.drawGoal(windowSurface,goals[g],agentColours[g%len(agentColours)])
    dispUtils.drawObstacles(windowSurface, [0,0,0], obstacles)

    dispUtils.drawObstacles(windowSurface, map_data[2], map_data[0])

    # Simulate Each Agent
    if sum(goalsHit) < (goalPerEpoch+1)*numberOfAgents:
        for i in range(numberOfAgents):
            if goalsHit[i]<goalPerEpoch+1:
                #Generate the laser scan data for each agent and check for any collisions
                laserScanData = dispUtils.laserScan(windowSurface, [100,100,100], robots[i], agentColours, 10, [-math.pi/2,math.pi/2], 500,54)
                if dispUtils.checkCollision(laserScanData,55) == True:
                    done = True
                    isFinal = True
                #print(laserScanData)

                # Create list of other Agents
                others = robots[:i]
                others.extend(robots[(i+1):])

                # Check for collisions with other robots
                for otherRobot in others:
                    if dispUtils.getInterAgentDistace(robots[i],otherRobot)< robots[i]['size']*2:
                        done = True
                        isFinal = True

                # Draw the robot and create the new observation list
Ejemplo n.º 3
0
 def is_done(self,observation):
     if dispUtils.checkCollision(observation[0:10],55) == True : #or self.curr_step >= self.max_steps
         self.done = True
Ejemplo n.º 4
0
def getReward(surface, agent, target, otherAgents, observation, action, prevtargetAng, prevtargetDist):
    #REWARD FUNCTION
    # Determine the reward for the previous action taken
    # Reward for movement
    reward = 0
    if action == 0:  # FORWARD
        # if min(observation[2:7])<1.25:
        #     reward -= 10
        # else:
        #     reward += 1
        reward += 0
    elif action == 1:  # LEFT
        # if min(observation[0:3])<1.25:
        #     reward -= 10
        reward += 0
    elif action == 2:  # RIGHT
        # if min(observation[6:9])<1.25:
        #     reward -= 10
        reward += 0
    elif action == 3:  # STOP
        if min(observation[0:9])<2:
            reward -= 0
        else:
            reward -= 1

    # if min(observation[0:9])<1.5:
    #     reward -= 20

    otherAgentDataIndex = [12,16]

    #check if another agent is in the way

    pathClear = True
    blockingDist = 100
    for i in otherAgentDataIndex:
        correctAng = abs(math.atan(25/(dispUtils.getInterAgentDistace(agent,target)+1)))
        relAngleDif = math.radians(observation[i+1]) - math.radians(observation[-3])

        if relAngleDif > math.pi:
            relAngleDif = relAngleDif-2*math.pi
        elif relAngleDif < -math.pi:
            relAngleDif = relAngleDif+2*math.pi



        # if relAngleDif > -correctAng*2 and relAngleDif < correctAng*2:
        #     if observation[i] < dispUtils.getInterAgentDistace(agent,target):
        #         pathClear = False
        if observation[i]*math.sin(relAngleDif) > -0.5 and observation[i]*math.sin(relAngleDif) < 0.5:
            if observation[i]*math.cos(relAngleDif) > 0 and observation[i]*math.cos(relAngleDif) < dispUtils.getInterAgentDistace(agent,target)/100:
                pathClear = False
                if observation[i]*math.cos(relAngleDif) < blockingDist:
                    blockingDist = observation[i]*math.cos(relAngleDif)
                # print("path not Clear" + str(observation[i]*math.cos(relAngleDif))+ str(dispUtils.getInterAgentDistace(agent,target)/100))

    # if pathClear:
        # Reward for facing the goal or moving toward facing the goal
        # correctAng = abs(math.degrees(math.atan(25/(dispUtils.getInterAgentDistace(agent,target)+1))))
        # if correctAng < 2:
        #     correctAng = 2

        # if observation[-3] <= correctAng and observation[-3] >= -correctAng:
        #     if action != 3 and action != 2 and action != 1:
        #         reward+=3
        # else:
        #     if observation[-3] > 0 and prevtargetAng>=0:
        #         if prevtargetAng > observation[-3] or observation[-3] < correctAng:
        #             reward+=1
        #         else:
        #             reward-=3
        #     elif observation[-3] < 0 and prevtargetAng <= 0:
        #         if prevtargetAng < observation[-3] or observation[-3] > -correctAng:
        #             reward+=1
        #         else:
        #             reward-=3
        #     else:
        #         reward-=3
        # prevtargetAng = observation[-3]
    # else:
    #     correctAng = abs(math.degrees(math.atan(25/blockingDist)))
    #     if correctAng < 2:
    #         correctAng = 2
    #
    #
    #     if (observation[-3] >= correctAng and observation[-3] <= correctAng+45):
    #         if action != 3 and action != 2 and action != 1:
    #             reward+=2
    #     elif (observation[-3] <= -correctAng and observation[-3] >= -(correctAng+45)):
    #         if action != 3:
    #             reward-=0
    #     else:
    #         if observation[-3] > 0 and prevtargetAng >= 0:
    #             if observation[-3] < correctAng and prevtargetAng < observation[-3]:
    #                 reward+=1
    #             elif observation[-3] > correctAng+45 and prevtargetAng > observation[-3]:
    #                 reward+=1
    #             else:
    #                 reward-=3
    #         elif observation[-3] < 0 and prevtargetAng <= 0:
    #             if observation[-3] > -correctAng and prevtargetAng < observation[-3]:
    #                 reward+=1
    #             elif observation[-3] < -correctAng-45 and prevtargetAng < observation[-3]:
    #                 reward+=1
    #             else:
    #                 reward-=3
    #         else:
    #             reward-=3
    #
    #     prevtargetAng = observation[-3]


    # Agent to Agent reward
    # Dont get too close
    for oi in range(len(otherAgents)):
        # Punish collision
        if dispUtils.getInterAgentDistace(agent,otherAgents[oi])< agent['size']*2.5:
            reward-=100

    for i in otherAgentDataIndex:
        #Punish Distance to Other Agents
        if observation[i]< 1.5:
            reward-=5
            if observation[i] < 1:
                reward-= 5 + (1-observation[i]/1)*5

        if observation[i+2] > 0 and observation[11]>0 and observation[i]<5 :
            otherAgentHeading = math.radians(observation[i+3])
            relativeAngToOtherAgent = math.radians(observation[i+1])
            relativeAngFromOtherAgent = relativeAngToOtherAgent - math.pi - otherAgentHeading

            if relativeAngFromOtherAgent > math.pi:
                relativeAngFromOtherAgent = relAngleDif-2*math.pi
            elif relativeAngFromOtherAgent < -math.pi:
                relativeAngFromOtherAgent = relAngleDif+2*math.pi

            if otherAgentHeading<= math.pi/4 and otherAgentHeading>= -math.pi/4 :
                if relativeAngFromOtherAgent > - 3*math.pi/4  and relativeAngFromOtherAgent < -math.pi/2:
                    reward-=10
            # elif otherAgentHeading< 3 * math.pi/4 and otherAgentHeading> math.pi/4:
            #     if relativeAngFromOtherAgent < math.pi/4 and relativeAngFromOtherAgent > -math.pi/4:
            #         reward-=0.5
            # elif otherAgentHeading> -3 * math.pi/4 and otherAgentHeading< -math.pi/4:
            #     if relativeAngFromOtherAgent > -math.pi/4 and relativeAngFromOtherAgent < math.pi/4:
            #         reward-=0.5
            elif otherAgentHeading<= -3 * math.pi/4 or otherAgentHeading>= 3*math.pi/4:
                if  relativeAngFromOtherAgent > -math.pi/2 and relativeAngFromOtherAgent < 0:
                    reward-=10



                # if (relativeAngToOtherAgent < 0 and relativeAngToOtherAgent > -math.pi/2) or (relativeAngToOtherAgent >= 0 and observation[i]*math.sin(relativeAngToOtherAgent)<0.7):
                #     reward-=1



        # else:
        #     if observation[i+3]<= math.pi/4 and observation[i+3]>= -math.pi/4 :
        #         if observation[i+1] > math.pi/4 and observation[i+1] < math.pi/2:
        #             reward-=1

    # Goal and Obstacle related Reward
    if observation[-1]==1:
        reward += 100 #at Goal
    elif dispUtils.checkCollision(observation[0:9],55) == True:
        reward -= 100 #collide with obstacle
    # elif dispUtils.getInterAgentDistace(agent,target) < 100:
    #     reward -= 0
    # elif dispUtils.getInterAgentDistace(agent,target) < prevtargetDist - 5:
    #     reward += 6 # Closer to goal
    # elif dispUtils.getInterAgentDistace(agent,target) < prevtargetDist + 5 and dispUtils.getInterAgentDistace(agent,target) > prevtargetDist - 5:
    #     reward -= 0
    # else:
    #     reward -= 6


    # prevtargetDist = dispUtils.getInterAgentDistace(agent,target)

    # Punnish driving foward close to walls
    # for scan in observation[0:9]:
    #     if scan <= 1.5:
    #         reward-=0.5
    #         if scan <= 1:
    #             reward-= 0.2 + ((1-scan)/1)*0.2
    #             if scan <= 0.65:
    #                 reward-= 2

    return reward, prevtargetAng, prevtargetDist