Ejemplo n.º 1
0
 def updateGhostsToMap(self, state):
     ghost = api.ghosts(state)
     for i in range(len(ghost)):
         if (api.ghostStates(state)[i][1] != 1):
             self.map.setValue(int(ghost[i][0]), int(ghost[i][1]),
                               (-self.initialFood, 0))
             self.aroundGhosts(int(ghost[i][0]), int(ghost[i][1]),
                               -self.initialFood, 0)
         else:
             self.map.setValue(int(ghost[i][0]), int(ghost[i][1]),
                               (-0.02, 0))
Ejemplo n.º 2
0
    def updateMap(self, state):
        ghost = api.ghosts(state)
        ghostStates = api.ghostStates(state)
        capsule = api.capsules(state)
        food = api.food(state)
        wall = api.walls(state)

        for node in self.mapDictionary:
            if node in wall:
                self.mapDictionary.update({node: 'X'})

        return self.mapDictionary
Ejemplo n.º 3
0
    def getGhostsAndStates(self, state, distance):
        ''' Get ghosts within a certain distance, will return an array of ghost locations with its status
            (Whether it is active or not)'''

        # Get List of Ghosts within certain area.
        ghosts = api.distanceLimited(self.ghosts, state, distance)

        # Get List of Ghosts with States in whole map.
        ghostStates = api.ghostStates(state)

        # Temporary List to Return Dangerous Ghosts in specified area.
        dangerousGhosts = []

        # Find dangerous ghosts and add to dangerous ghosts list
        if (len(ghosts) != 0):
            for ghost in ghosts:
                for ghostState in ghostStates:
                    if (ghost == ghostState[0]):
                        dangerousGhosts.append(ghostState)

        return dangerousGhosts
    def rewardMapping(self, state):
        """ Populate a dictionary of reward values for each traversable square in the map """

        walls = self.walls
        food = set(api.food(state))
        ghostStates = api.ghostStates(state)
        capsules = set(api.capsules(state))

        # initialise all states to reward of -1, the default value for a blank square
        self.rewardDict = {key: -1 for key in self.grid if key not in walls}

        # initialise all states' utilities to 0 whilst we're at it
        self.utils = {key: 0 for key in self.grid if key not in walls}

        # update the reward dictionary with reward values at locations of food and capsules
        foodDict = {k: 10 for k, v in self.rewardDict.items() if k in food}
        self.rewardDict.update(foodDict)
        capsuleDict = {
            k: 20
            for k, v in self.rewardDict.items() if k in capsules
        }
        self.rewardDict.update(capsuleDict)

        # loop through the ghost locations and their statuses indicating whether they're scared; if they
        # are scared, ignore them. Otherwise, assign negative rewards to both: a) their locations and b)
        # a radius of variable width around them.
        for j in ghostStates:
            if j[0] in self.rewardDict.keys():
                if j[1] == 0:
                    self.rewardDict[j[0]] = -50
                    ''' adjust size of exclusion zone around ghosts with 3rd argument below '''
                    ghostNeighbours = self.ghostRadius(state, j[0], 2)

                    ghostRadius = {
                        k: -25
                        for k, v in self.rewardDict.items()
                        if k in ghostNeighbours
                    }
                    self.rewardDict.update(ghostRadius)
Ejemplo n.º 5
0
    def getAction(self, state):
        # Get the actions we can try, and remove "STOP" if that is one of them.
        legal = api.legalActions(state)
        pacloc = api.whereAmI(state)
        ghoststates = api.ghostStates(state)
        ghostcors = []
        if Directions.STOP in legal:
            legal.remove(Directions.STOP)

        self.updatefood(state)

        #Making a list of ghost coordinates which are still active and not edible
        # So these ghost coordinates will get negative value during iteration
        for i in range(len(ghoststates)):
            # converting ghosts coordinates in integer
            ghostX = ghoststates[i][0][0]
            ghostY = ghoststates[i][0][1]
            ghostXY = (int(ghostX), int(ghostY))
            if ghostXY in self.pcMap and ghoststates[i][1] == 0:
                ghostcors.append((int(ghostX), int(ghostY)))

                #Create a buffer around ghost so pacman will avoid it
                eghost = ((ghostXY[0] + 1), ghostXY[1])
                wghost = ((ghostXY[0] - 1)), ghostXY[1]
                nghost = ((ghostXY[0]), ghostXY[1] + 1)
                sghost = ((ghostXY[0]), ghostXY[1] - 1)
                ghostcors.append(eghost)
                ghostcors.append(wghost)
                ghostcors.append(nghost)
                ghostcors.append(sghost)

        # The value iteration will be called until all the value of the bellman
        # stops changing from the previous values
        for i in range(100):
            stable = self.valueIteration(state, ghostcors)
            if stable == 0:
                #print "stable at ", i
                break

        # East Coordinates
        if Directions.EAST in legal:
            eastofpacman = (pacloc[0] + 1, pacloc[1])
        else:
            eastofpacman = pacloc

        # West Coordinates
        if Directions.WEST in legal:
            westofpacman = (pacloc[0] - 1, pacloc[1])
        else:
            westofpacman = pacloc

        # North Coordinates
        if Directions.NORTH in legal:
            northofpacman = (pacloc[0], pacloc[1] + 1)
        else:
            northofpacman = pacloc

        # South Coordinates
        if Directions.SOUTH in legal:
            southofpacman = ((pacloc[0], pacloc[1] - 1))
        else:
            southofpacman = pacloc

        maxutil = -999
        makeMove = 'null'

        unorth = self.iterationDict[northofpacman]
        usouth = self.iterationDict[southofpacman]
        ueast = self.iterationDict[eastofpacman]
        uwest = self.iterationDict[westofpacman]

        # Get a movement policy for pacman using the
        # value iteration map that is updated at every move

        if Directions.NORTH in legal:
            movnorth = ((0.8 * unorth) + (0.1 * uwest) + (0.1 * ueast))
            if movnorth > maxutil:
                maxutil = movnorth
                makeMove = Directions.NORTH
        if Directions.EAST in legal:
            moveast = ((0.8 * ueast) + (0.1 * unorth) + (0.1 * usouth))
            if moveast > maxutil:
                maxutil = moveast
                makeMove = Directions.EAST
        if Directions.SOUTH in legal:
            movsouth = ((0.8 * usouth) + (0.1 * uwest) + (0.1 * ueast))
            if movsouth > maxutil:
                maxutil = movsouth
                makeMove = Directions.SOUTH
        if Directions.WEST in legal:
            movwest = ((0.8 * uwest) + (0.1 * unorth) + (0.1 * usouth))
            if movwest > maxutil:
                maxutil = movwest
                makeMove = Directions.WEST

        if makeMove != 'null':
            self.itrStart = 0
            return api.makeMove(makeMove, legal)
        else:
            print "where am I ?"
Ejemplo n.º 6
0
 def __value_iteration(self, state, debug_mode, deep_debug_mode,
                       ghostbuster_mode):
     """initialize data structures for value iteration"""
     ghosts_states = api.ghostStates(state)
     edible_ghosts = []
     hostile_ghosts = []
     early_stopping_point = self.__NORMAL_EARLY_STOPPING_POINT
     for ghost_state in ghosts_states:
         if ghost_state[1] == 1:
             edible_ghosts.append(
                 (int(ghost_state[0][0]), int(ghost_state[0][1])))
         else:
             hostile_ghosts.append(ghost_state[0])
     if debug_mode:
         print("\tedible_ghosts=" + str(edible_ghosts))
         print("\thostile_ghosts=" + str(hostile_ghosts))
     # inactive ghostbuster mode
     self.__rewards, self.__utilities = self.__initialize_data_structures(
         self.__foods,
         self.__FOOD,
         self.__walls,
         self.__floors,
         hostile_ghosts,
         safety_distance=self.__SAFETY_DISTANCE,
         threat_decay_rate=self.__THREAT_DECAY_RATE)
     early_stopping_point = self.__NORMAL_EARLY_STOPPING_POINT
     if len(self.__foods) < 10:
         early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT
     # defensive ghostbuster mode
     if ghostbuster_mode == self.__DEFENSIVE_GHOSTBUSTER_MODE:
         if len(edible_ghosts) > 0:
             self.__rewards, self.__utilities = self.__initialize_data_structures(
                 edible_ghosts,
                 self.__EDIBLE,
                 self.__walls,
                 self.__floors,
                 hostile_ghosts,
                 safety_distance=self.__SAFETY_DISTANCE,
                 threat_decay_rate=self.__THREAT_DECAY_RATE)
             early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT
     # offensive ghostbuster_mode
     if ghostbuster_mode == self.__OFFENSIVE_GHOSTBUSTER_MODE:
         if len(edible_ghosts) > 0:
             self.__rewards, self.__utilities = self.__initialize_data_structures(
                 edible_ghosts,
                 self.__EDIBLE,
                 self.__walls,
                 self.__floors,
                 hostile_ghosts,
                 safety_distance=self.__SAFETY_DISTANCE,
                 threat_decay_rate=self.__THREAT_DECAY_RATE)
             early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT
         elif len(self.__capsules) > 0:
             self.__rewards, self.__utilities = self.__initialize_data_structures(
                 self.__capsules,
                 self.__CAPSULE,
                 self.__walls,
                 self.__floors,
                 hostile_ghosts,
                 safety_distance=self.__SAFETY_DISTANCE,
                 threat_decay_rate=self.__THREAT_DECAY_RATE)
             early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT
     """use value iteration to update utilites until convergence or early stopping point"""
     stopping_point = None
     for i in range(early_stopping_point):
         stopping_point = i + 1
         self.__utilities, fully_convergent, total_entropy = self.__update_utilities(
             self.__walls,
             self.__neighbors,
             self.__rewards,
             self.__utilities,
             discount_factor=self.__DISCOUNT_FACTOR,
             convergence_tolerance=self.__CONVERGENCE_TOLERANCE,
             ignoring_walls=False,
             maximum_mode=True)
         if deep_debug_mode:
             self.__print_data_structure(self.__walls, self.__rewards)
             self.__print_data_structure(self.__walls, self.__utilities)
             print("\ttotal_entropy=" + "{:+10.3f}".format(total_entropy))
         if fully_convergent:
             break
     if debug_mode:
         self.__print_data_structure(self.__walls, self.__rewards)
         self.__print_data_structure(self.__walls, self.__utilities)
         print("\ttotal_entropy=" + "{:+10.3f}".format(total_entropy))
         print("\tstopping_point=" + str(stopping_point))
Ejemplo n.º 7
0
    def compute(self, state, Dictionary):
        ghost = api.ghosts(state)
        ghostStates = api.ghostStates(state)
        capsule = api.capsules(state)
        food = api.food(state)
        wall = api.walls(state)

        # 1 ghost
        if len(ghost) == 1:
            # ghost co-ordinates and its states
            ghost1x = int(ghostStates[0][0][0])
            ghost1y = int(ghostStates[0][0][1])
            ghost1xy = (ghost1x, ghost1y)
            ghost1State = ghostStates[0][1]
            doNotEnter = []
            if ghost1State == 0:
                for i in range(1, 2):
                    if (ghost1x + i, ghost1y) in self.mapCoords:
                        doNotEnter.append((ghost1x +  i, ghost1y))
                    if (ghost1x - i, ghost1y) in self.mapCoords:
                        doNotEnter.append((ghost1x + i, ghost1y))
                    if (ghost1x, ghost1y + i) in self.mapCoords:
                        doNotEnter.append((ghost1x, ghost1y + i))
                    if (ghost1x, ghost1y - i) in self.mapCoords:
                        doNotEnter.append((ghost1x, ghost1y - i))

            #rewards and penalty
            ghostPenalty = -500
            capsuleReward = 10
            foodReward = 10
            safeGhost = 10
            noReward = 0
            doNotEnterPenalty = -500
            discountFactor = 0.9
            iterations = 50
            while iterations > 0:
                # a copy of the map, values are stored between iterations
                copy = Dictionary.copy()
                for x, y in Dictionary:
                    #value iteration
                    if (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 0:
                        Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 1:
                        Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) in doNotEnter:
                        Dictionary[(x,y)] = doNotEnterPenalty + discountFactor * self.expectedUtitlity(copy, x, y)
                    elif (x, y) in self.pathCoords and (x, y) not in food and (x, y) not in ghost and (x, y) not in capsule:
                        Dictionary[(x, y)] = noReward + discountFactor * self.expectedUtitlity(copy, x, y)
                    elif (x, y) in self.pathCoords and (x, y) in capsule:
                        Dictionary[(x, y)] = capsuleReward + discountFactor * self.expectedUtitlity(copy, x, y)
                    elif (x, y) in self.pathCoords and (x, y) in food:
                        Dictionary[(x, y)] = foodReward + discountFactor * self.expectedUtitlity(copy, x, y)
                iterations -= 1

        # 2 ghosts
        if len(ghost) == 2:
            # ghost co-ordinates and its states
            ghost1x = int(ghostStates[0][0][0])
            ghost1y = int(ghostStates[0][0][1])
            ghost1xy = (ghost1x, ghost1y)
            ghost1State = ghostStates[0][1]
            ghost2x = int(ghostStates[1][0][0])
            ghost2y = int(ghostStates[1][0][1])
            ghost2xy = (ghost2x, ghost2y)
            ghost2State = ghostStates[1][1]
            doNotEnter = []
            if ghost1State == 0:
                for i in range (1, 3):
                    if (ghost1x + i, ghost1y) in self.pathCoords:
                        doNotEnter.append((ghost1x + i, ghost1y))
                    if (ghost1x - i, ghost1y) in self.pathCoords:
                        doNotEnter.append((ghost1x - i, ghost1y))
                    if (ghost1x, ghost1y + i) in self.pathCoords:
                        doNotEnter.append((ghost1x, ghost1y + i))
                    if (ghost1x, ghost1y - i) in self.pathCoords:
                        doNotEnter.append((ghost1x, ghost1y - i))

            #rewards and penalty
            ghostPenalty = -500
            capsuleReward = 10
            foodReward = 10
            safeGhost = 10
            noReward = 0
            doNotEnterPenalty = -500
            discountFactor = 0.9
            iterations = 50
            # value iteration
            while iterations > 0:
                copy = Dictionary.copy()
                for x, y in Dictionary:
                    if (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 0:
                        Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) == ghost2xy and ghost2State == 0:
                        Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 1:
                        Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) == ghost2xy and ghost2State == 1:
                        Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) in doNotEnter:
                        Dictionary[(x, y)] = doNotEnterPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y)))
                    elif (x, y) in self.pathCoords and (x, y) in capsule:
                        Dictionary[(x, y)] = capsuleReward + discountFactor * self.expectedUtitlity(copy, x, y)
                    elif (x, y) in self.pathCoords and (x, y) in food:
                        Dictionary[(x, y)] = foodReward + discountFactor * self.expectedUtitlity(copy, x, y)
                    elif (x, y) in self.pathCoords and (x, y) not in food and (x, y) not in capsule and (x, y) not in ghost:
                        Dictionary[(x, y)] = noReward + discountFactor * self.expectedUtitlity(copy, x, y)
                iterations -= 1
Ejemplo n.º 8
0
    def __init__(self, state):
        # Required information about the world
        pacmanOrg = api.whereAmI(state)
        self.walls = api.walls(state)
        self.caps = api.capsules(state)
        self.reward = api.food(state)
        self.loss = api.ghosts(state)
        if (len(self.loss) > 0):
            self.loss[0] = (int(self.loss[0][0]), int(self.loss[0][1]))

        # Ignore scared ghosts
        for ghost in api.ghostStates(state):
            if (ghost[0] in self.loss and ghost[1] == 1):
                self.loss.remove(ghost[0])

        # Grid dimentions based on last wall co-ordinate
        self.x1 = self.walls[len(self.walls) - 1][0] + 1
        self.y1 = self.walls[len(self.walls) - 1][1] + 1

        self.grid = [[0 for y in range(self.y1)] for x in range(self.x1)]

        #Intialize default utilities will be used as rewards
        for x in range(self.x1):
            for y in range(self.y1):
                closeGhosts = api.distanceLimited(self.loss, state, 4)
                if (x, y) in self.walls:
                    self.grid[x][y] = None
                elif (x, y) in self.loss:
                    self.grid[x][y] = -20
                elif (x, y) in self.caps:
                    self.grid[x][y] = 10
                # Larger grids
                elif (self.x1 > 7):
                    # If there are nearby ghosts subtract ghost score divided by distance
                    if (len(closeGhosts) >= 1):
                        if (x, y) in self.reward:
                            self.grid[x][y] = 1
                        else:
                            self.grid[x][y] = 0
                    else:
                        if (x, y) in self.reward:
                            self.grid[x][y] = 1
                        else:
                            self.grid[x][y] = 0
                #Smaller Grids
                else:
                    if (x, y) in self.reward:
                        if (len(self.reward) > 1):
                            if ((x, y) == (1, 1)):
                                self.grid[x][y] = 5
                            else:
                                self.grid[x][y] = 1
                        else:
                            self.grid[x][y] = 10
                    else:
                        self.grid[x][y] = 0
        # Override rewards for legal spaces near ghosts
        if (self.x1 > 7):
            for y in range(self.y1):
                for x in range(self.x1):
                    if (x, y) in self.loss:
                        if (x + 1, y) not in self.walls:
                            self.grid[x + 1][y] = -15
                            if (x + 2,
                                    y) not in self.walls and (x + 2) < self.x1:
                                self.grid[x + 2][y] = -10
                        if (x - 1, y) not in self.walls:
                            self.grid[x - 1][y] = -15
                            if (x - 2, y) not in self.walls and (x - 2) > 0:
                                self.grid[x - 2][y] = -10
                        if (x, y + 1) not in self.walls:
                            self.grid[x][y + 1] = -15
                            if (x, y +
                                    2) not in self.walls and (y + 2) < self.y1:
                                self.grid[x][y + 2] = -10
                        if (x, y - 1) not in self.walls:
                            self.grid[x][y - 1] = -15
                            if (x, y - 2) not in self.walls and (y - 2) > 0:
                                self.grid[x][y - 2] = -10
                        if (x + 1, y + 1) not in self.walls:
                            self.grid[x + 1][y + 1] = -10
                        if (x + 1, y - 1) not in self.walls:
                            self.grid[x + 1][y - 1] = -10
                        if (x - 1, y + 1) not in self.walls:
                            self.grid[x - 1][y + 1] = -10
                        if (x - 1, y - 1) not in self.walls:
                            self.grid[x - 1][y - 1] = -10