def __init__(self, index, mdp, discountRate=0.9, iters=100, **kwargs): super().__init__(index) self.mdp = mdp self.discountRate = discountRate self.iters = iters self.values = counter.Counter() # A Counter is a dict with default 0 # Compute the values here. # Number of iterations for i in range(self.iters): values_temp = copy.deepcopy(self.values) for state in self.mdp.getStates(): Qvalue = counter.Counter() if self.mdp.isTerminal(state): continue # Qvalue = counter.Counter() for action in self.mdp.getPossibleActions(state): for nextState, prob in self.mdp.getTransitionStatesAndProbs( state, action): R = self.mdp.getReward(state, action, nextState) discount = self.discountRate Qvalue[action] += prob * (R + (discount * (values_temp[nextState]))) self.values[state] = Qvalue[Qvalue.argMax()]
def __init__(self, index, extractor='pacai.core.featureExtractors.IdentityExtractor', **kwargs): super().__init__(index, **kwargs) self.featExtractor = reflection.qualifiedImport(extractor) # You might want to initialize weights here. self.weights = counter.Counter()
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) features['successorScore'] = self.getScore(successor) myPos = successor.getAgentState(self.index).getPosition() # Compute distance to the nearest Capsule # capsules = self.getCapsules(gameState) if len(capsules) > 0: capDistance = min( [self.getMazeDistance(myPos, capsule) for capsule in capsules]) features['capsuleDistance'] = capDistance else: features['capsuleDistance'] = 0 # Compute distance to nearest ghost (usually a defender goalie) # # enemies = [gameState.getAgentState(i) for i in self.getOpponents(gameState)] # chasers = [a for a in enemies if not a.isPacman() and a.getPosition() is not None] # uncomment this for invader information # invaders = [a for a in enemies if a.isPacman() and a.getPosition() is not None] # Compute distance to the nearest food. foodList = self.getFood(successor).asList() if (len(foodList) > 0): minDist = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['DistanceToFoodTarget'] = minDist return features
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) features['successorScore'] = self.getScore(successor) food = self.getFood(successor) foodList = food.asList() if (len(foodList) > 0): myPos = successor.getAgentState(self.index).getPosition() minDistance = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['distanceToFood'] = minDistance opponents = self.getOpponents(gameState) agentState = gameState.getAgentState(self.index) myPos = agentState.getPosition() closeGhosts = 0 for opponent in opponents: opponentPos = gameState.getAgentState(opponent).getPosition() if self.getMazeDistance(myPos, opponentPos) < 2: closeGhosts += 1 features['closeGhosts'] = closeGhosts foodAtMyPos = food[int(myPos[0])][int(myPos[1])] if not closeGhosts and foodAtMyPos: features['eatFood'] = 1.0 return features
def getFeatures(self, state, action): # Extract the grid of food and wall locations and get the ghost locations. food = state.getFood() walls = state.getWalls() ghosts = state.getGhostPositions() features = counter.Counter() features["bias"] = 1.0 # Compute the location of pacman after he takes the action. x, y = state.getPacmanPosition() dx, dy = Actions.directionToVector(action) next_x, next_y = int(x + dx), int(y + dy) # Count the number of ghosts 1-step away. features["#-of-ghosts-1-step-away"] = sum( (next_x, next_y) in Actions.getLegalNeighbors(g, walls) for g in ghosts) # If there is no danger of ghosts then add the food feature. if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]: features["eats-food"] = 1.0 prob = AnyFoodSearchProblem(state, start=(next_x, next_y)) dist = len(search.bfs(prob)) if dist is not None: # Make the distance a number less than one otherwise the update will diverge wildly. features["closest-food"] = float(dist) / (walls.getWidth() * walls.getHeight()) features.divideAll(10.0) return features
def __init__(self, index, mdp, discountRate=0.9, iters=100, **kwargs): super().__init__(index, **kwargs) self.mdp = mdp self.discountRate = discountRate self.iters = iters self.values = counter.Counter() # A Counter is a dict with default 0 # Compute the values here. states = mdp.getStates() for i in range(iters): vals = counter.Counter() for state in states: action = self.getAction(state) if action: vals[state] = self.getQValue(state, action) self.values = vals
def __init__(self, index, mdp, discountRate=0.9, iters=100, **kwargs): super().__init__(index) self.mdp = mdp self.discountRate = discountRate self.iters = iters self.values = counter.Counter() # A Counter is a dict with default 0 # Compute the values here. # FIXME: can be more efficient, getQValue is called twice. for i in range(iters): newVals = counter.Counter() for state in mdp.getStates(): bestAction = self.getAction(state) if bestAction is not None: newVals[state] = self.getQValue(state, bestAction) self.values = newVals.copy()
def getFeatures(self, gameState, action): features = counter.Counter() successor = gameState myState = successor.getAgentState(self.index) myPos = myState.getPosition() # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 features['successorScore'] = 0 features['distanceToFood'] = 0 # Computes distance to invaders we can see. enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if (len(invaders) > 0): dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) features['onDefense'] = 1 else: features['onDefense'] = 0 myPos = successor.getAgentState(self.index).getPosition() foodList = self.getFood(successor).asList() minDistance = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['distanceToFood'] = minDistance features['successorScore'] = self.getScore(successor) features['invaderDistance'] = 0 team = [] if successor.isOnBlueTeam(self.index): team = successor.getBlueTeamIndices() else: team = successor.getRedTeamIndices() teammate = -1 for num in team: if num != self.index: teammate = num otherAgent = successor.getAgentState(teammate) otherPos = otherAgent.getPosition() teamDist = self.getMazeDistance(myPos, otherPos) if teamDist == 0: features['teammateDist'] = 10 else: features['teammateDist'] = 1 / teamDist return features
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) myState = successor.getAgentState(self.index) myPos = myState.getPosition() # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 else: features['isGhost'] = 1 # Computes distance to invaders we can see. enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if (len(invaders) > 0): dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) if min(dists) <= 1 and successor.getAgentState( self.index).isScared(): features['suicide'] = 1 enemies = self.getOpponents(gameState) defenders = [] attackers = [] d = 0 for enemy in enemies: if gameState.getAgentState(enemy).isPacman(): attackers.append(gameState.getAgentState(enemy)) global assumedAttacker assumedAttacker = enemy global test test = 1 else: defenders.append(gameState.getAgentState(enemy)) d = enemy # Make defender not wait right on border if test == 1 and len(attackers) == 0: attackerState = successor.getAgentState(assumedAttacker) attPos = attackerState.getPosition() targetDest = midpointTiles[int(attPos[1])] features['chaser'] = self.getMazeDistance(targetDest, myPos) if len(attackers) == 0 and test == 0: attackerState = successor.getAgentState(d) attPos = attackerState.getPosition() targetDest = midpointTiles[int(attPos[1])] features['chaser'] = self.getMazeDistance(targetDest, myPos) return features
def getPolicy(self, state): # FIXME: not sure if this is even needed if self.mdp.isTerminal(state): return None memo = counter.Counter() for action in self.mdp.getPossibleActions(state): memo[action] = self.getQValue(state, action) return memo.argMax()
def displayValues(self, agent, currentState=None, message='Agent Values'): values = counter.Counter() policy = {} states = self.gridworld.getStates() for state in states: values[state] = agent.getValue(state) policy[state] = agent.getPolicy(state) drawValues(self.gridworld, values, policy, currentState, message) utils.sleep(0.05 / self.speed)
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) myState = successor.getAgentState(self.index) myPos = myState.getPosition() # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] # Compute distance to nearest ghost (before it is invading) ghosts = [ a for a in enemies if not a.isPacman() and a.getPosition() is not None ] if len(ghosts) > 0: dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in ghosts ] features['ghostDist'] = min(dists) # Computes distance to invaders we can see. invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if (len(invaders) > 0): dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) """ # Abbas put this in here for some reason, it doesn't seem to do anything yet closestDistance = dists[0] position = [a.getPosition() for a in invaders] closestPosition = position[0] for i in range(len(dists)): if dists[i] < closestDistance: closestDistance = dists[i] closestPosition = position[i] """ if (action == Directions.STOP): features['stop'] = 1 rev = Directions.REVERSE[gameState.getAgentState( self.index).getDirection()] if (action == rev): features['reverse'] = 1 return features
def __init__(self, index, mdp, discountRate=0.9, iters=100, **kwargs): super().__init__(index) self.mdp = mdp self.discountRate = discountRate self.iters = iters self.values = counter.Counter() # A Counter is a dict with default 0 # Compute the values here. raise NotImplementedError()
def displayNullValues(self, currentState=None, message=''): values = counter.Counter() # policy = {} states = self.gridworld.getStates() for state in states: values[state] = 0.0 # policy[state] = agent.getPolicy(state) drawNullValues(self.gridworld, currentState, '') # drawValues(self.gridworld, values, policy, currentState, message) utils.sleep(0.05 / self.speed)
def displayQValues(self, agent, currentState=None, message='Agent Q-Values'): qValues = counter.Counter() states = self.gridworld.getStates() for state in states: for action in self.gridworld.getPossibleActions(state): qValues[(state, action)] = agent.getQValue(state, action) drawQValues(self.gridworld, qValues, currentState, message) utils.sleep(0.05 / self.speed)
def getPolicy(self, state): if self.mdp.isTerminal(state): return None possibleActions = self.mdp.getPossibleActions(state) Qvalue = counter.Counter() for action in possibleActions: Qvalue[action] = self.getQValue(state, action) return Qvalue.argMax()
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) myState = successor.getAgentState(self.index) myPos = myState.getPosition() borders = successor.getWalls() midWidth = math.floor(borders.getWidth() / 2) yValue = random.randint(0, borders.getHeight() - 1) midPoint = (midWidth, yValue) while successor.hasWall(midWidth, yValue): yValue = random.randint(0, borders.getHeight() - 1) midPoint = (midWidth, yValue) # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 # Computes distance to invaders we can see. enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if len(invaders) == 0: distanceToMid = self.getMazeDistance(myPos, midPoint) features['distanceToMiddle'] = distanceToMid else: features['distanceToMiddle'] = 0 dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) if (action == Directions.STOP): features['stop'] = 1 rev = Directions.REVERSE[gameState.getAgentState( self.index).getDirection()] if (action == rev): features['reverse'] = 1 return features
def drawQValues(gridworld, qValues, currentState=None, message='State-Action Q-Values'): grid = gridworld.grid blank() stateCrossActions = [[(state, action) for action in gridworld.getPossibleActions(state)] for state in gridworld.getStates()] qStates = functools.reduce(lambda x, y: x + y, stateCrossActions, []) qValueList = [qValues[(state, action)] for state, action in qStates] + [0.0] minValue = min(qValueList) maxValue = max(qValueList) for x in range(grid.width): for y in range(grid.height): state = (x, y) gridType = grid[x][y] isExit = (str(gridType) != gridType) isCurrent = (currentState == state) actions = gridworld.getPossibleActions(state) if (actions is None or len(actions) == 0): actions = [None] q = counter.Counter() valStrings = {} for action in actions: v = qValues[(state, action)] q[action] += v valStrings[action] = '%.2f' % v if gridType == '#': drawSquare(x, y, 0, 0, 0, None, None, True, False, isCurrent) elif isExit: action = 'exit' value = q[action] valString = '%.2f' % value drawSquare(x, y, value, minValue, maxValue, valString, action, False, isExit, isCurrent) else: drawSquareQ(x, y, q, minValue, maxValue, valStrings, actions, isCurrent) pos = to_screen(((grid.width - 1.0) / 2.0, -0.8)) utils.text(pos, TEXT_COLOR, message, "Courier", -32, "bold", "c")
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) features['successorScore'] = self.getScore(successor) # Compute distance to the nearest food. foodList = self.getFood(successor).asList() # This should always be True, but better safe than sorry. if (len(foodList) > 0): myPos = successor.getAgentState(self.index).getPosition() minDistance = min([self.getMazeDistance(myPos, food) for food in foodList]) features['distanceToFood'] = minDistance return features
def __init__(self, index, mdp, discountRate = 0.9, iters = 100, **kwargs): super().__init__(index) self.mdp = mdp self.discountRate = discountRate self.iters = iters self.values = counter.Counter() # A Counter is a dict with default 0 copyOfValues = self.values.copy() for i in range(self.iters): states = self.mdp.getStates() for state in states: if not self.mdp.isTerminal(state): move = self.getPolicy(state) qValue = self.getQValue(state, move) copyOfValues[state] = qValue for state in states: self.values[state] = copyOfValues[state]
def getFeatures(self, agent, gameState, action): features = counter.Counter() successor = gameState features['successorScore'] = self.getScore(successor) food = self.getFood(successor) foodList = food.asList() if (len(foodList) > 0): myPos = successor.getAgentState(self.index).getPosition() minDistance = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['distanceToFood'] = minDistance opponents = self.getOpponents(gameState) agentState = gameState.getAgentState(self.index) myPos = agentState.getPosition() closeGhosts = 0 for opponent in opponents: opponentPos = gameState.getAgentState(opponent).getPosition() if self.getMazeDistance(myPos, opponentPos) < 2: closeGhosts += 1 features['closeGhosts'] = closeGhosts foodAtMyPos = food[int(myPos[0])][int(myPos[1])] if not closeGhosts and foodAtMyPos: features['eatFood'] = 1.0 # ATTACK FEATURE capsules = self.getCapsules(gameState) powerPills = [self.getMazeDistance(myPos, pill) for pill in capsules] if len(powerPills) > 0: eatPill = min(powerPills) else: eatPill = 0 if closeGhosts > 0: features['eatPill'] = eatPill features['eatFood'] = 0 else: features['eatPill'] = 0 return features
def getDistribution(self, state): # Read variables from state. ghostState = state.getGhostState(self.index) legalActions = state.getLegalActions(self.index) pos = state.getGhostPosition(self.index) isScared = ghostState.isScared() speed = 1 if (isScared): speed = 0.5 actionVectors = [ Actions.directionToVector(a, speed) for a in legalActions ] newPositions = [(pos[0] + a[0], pos[1] + a[1]) for a in actionVectors] pacmanPosition = state.getPacmanPosition() # Select best actions given the state. distancesToPacman = [ distance.manhattan(pos, pacmanPosition) for pos in newPositions ] if (isScared): bestScore = max(distancesToPacman) bestProb = self.prob_scaredFlee else: bestScore = min(distancesToPacman) bestProb = self.prob_attack zipActions = zip(legalActions, distancesToPacman) bestActions = [ action for action, distance in zipActions if distance == bestScore ] # Construct distribution. dist = counter.Counter() for a in bestActions: dist[a] = float(bestProb) / len(bestActions) for a in legalActions: dist[a] += float(1 - bestProb) / len(legalActions) dist.normalize() return dist
def getFeatures(self, state, action): features = counter.Counter() successor = self.getSuccessor(state, action) myState = successor.getAgentState(self.index) myPos = myState.getPosition() # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 # Computes distance to invaders we can see. enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if (len(invaders) > 0): dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) else: if state.isOnBlueTeam(self.index): foods = state.getBlueFood().asList() features['stayFront'] = self.getMazeDistance(myPos, foods[0]) else: foods = state.getRedFood().asList() features['stayFront'] = self.getMazeDistance(myPos, foods[-1]) if (action == Directions.STOP): features['stop'] = 1 rev = Directions.REVERSE[state.getAgentState( self.index).getDirection()] if (action == rev): features['reverse'] = 1 return features
def getFeatures(self, state): food = self.getFood(state) capsules = self.getCapsules(state) features = counter.Counter() features['score'] = self.getScore(state) opponents = self.getOpponents(state) myState = state.getAgentState(self.index) myPos = myState.getPosition() closeGhosts = 0 for opponent in opponents: opponentPos = state.getAgentState(opponent).getPosition() if self.getMazeDistance(myPos, opponentPos) < 2: closeGhosts += 1 features['#-of-ghosts-1-step-away'] = closeGhosts foodAtMyPos = food[int(myPos[0])][int(myPos[1])] if not closeGhosts and foodAtMyPos: features['eats-food'] = 10.0 foodDist = [] for x in range(food.getWidth()): for y in range(food.getHeight()): if food[x][y]: foodDist.append(self.getMazeDistance(myPos, (x, y))) features['closest-food'] = min(foodDist) # ATTACK FEATURE powerPills = [self.getMazeDistance(myPos, pill) for pill in capsules] if len(powerPills) > 0: eatPill = min(powerPills) else: eatPill = 0 if closeGhosts > 0: features['eat-Pill'] = eatPill features['eats-food'] = 0 else: features['eat-Pill'] = 0 return features
def getFeatures(self, gameState, action): features = counter.Counter() successor = self.getSuccessor(gameState, action) agentState = successor.getAgentState(self.index) agentPos = agentState.getPosition() # Computes whether we're on defense (1) or offense (0). features['onDefense'] = 1 if (agentState.isPacman()): features['onDefense'] = 0 # Computes distance to invaders we can see. opponents = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] enemies = [ opponent for opponent in opponents if opponent.getPosition() is not None ] invaders = [enemy for enemy in enemies if enemy.isPacman()] if (len(enemies) > 0): dists = [ self.getMazeDistance(agentPos, a.getPosition()) for a in enemies ] features['invaderDistance'] = min(dists) elif (len(invaders) > 0): dists = [ self.getMazeDistance(agentPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) rev = Directions.REVERSE[gameState.getAgentState( self.index).getDirection()] if (action == rev): features['reverse'] = 1 return features
def getFeatures(self, gameState): features = counter.Counter() features['successorScore'] = self.getScore(gameState) # FIXME: extract more features here. myPos = gameState.getAgentState(self.index).getPosition() # Compute distance to the nearest food. foodList = self.getFood(gameState).asList() if (len(foodList) > 0): minDist = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['DistanceToFoodTarget'] = minDist # Compute distance to the nearest Capsule # capsules = self.getCapsules(gameState) if len(capsules) > 0: capDistance = min( [self.getMazeDistance(myPos, capsule) for capsule in capsules]) features['capsuleDistance'] = capDistance else: features['capsuleDistance'] = 0 return features
def normalize(vectorOrCounter): """ Normalize a vector or counter by dividing each value by the sum of all values. """ normalizedCounter = counter.Counter() if type(vectorOrCounter) == type(normalizedCounter): counterContainer = vectorOrCounter total = float(counterContainer.totalCount()) if total == 0: return counterContainer for key in list(counter.keys()): value = counter[key] normalizedCounter[key] = value / total return normalizedCounter else: vector = vectorOrCounter s = float(sum(vector)) if s == 0: return vector return [el / s for el in vector]
def getDistribution(self, state): dist = counter.Counter() for a in state.getLegalActions(self.index): dist[a] = 1.0 dist.normalize() return dist
def evaluationFunction(self, agent, gameState, action): weights = counter.Counter(self.getWeights(gameState)) features = self.getFeatures(agent, gameState, action) return weights * features
def getFeatures(self, agent, gameState, action): features = counter.Counter() if self.offense: # successor = self.getSuccessor(agent, gameState, action) successor = gameState features['successorScore'] = self.getScore(successor) food = self.getFood(successor) foodList = food.asList() if (len(foodList) > 0): myPos = successor.getAgentState(self.index).getPosition() minDistance = min( [self.getMazeDistance(myPos, food) for food in foodList]) features['distanceToFood'] = minDistance opponents = self.getOpponents(gameState) agentState = gameState.getAgentState(self.index) myPos = agentState.getPosition() closeGhosts = 0 for opponent in opponents: opponentPos = gameState.getAgentState(opponent).getPosition() if self.getMazeDistance(myPos, opponentPos) < 2: closeGhosts += 1 features['closeGhosts'] = closeGhosts foodAtMyPos = food[int(myPos[0])][int(myPos[1])] if not closeGhosts and foodAtMyPos: features['eatFood'] = 1.0 # ATTACK FEATURE capsules = self.getCapsules(gameState) powerPills = [ self.getMazeDistance(myPos, pill) for pill in capsules ] if len(powerPills) > 0: eatPill = min(powerPills) else: eatPill = 0 if closeGhosts > 0: features['eatPill'] = eatPill features['eatFood'] = 0 else: features['eatPill'] = 0 return features ''' Defense features ''' # successor = self.getSuccessor(agent, gameState, action) successor = gameState myState = successor.getAgentState(self.index) myPos = myState.getPosition() features['onDefense'] = 1 if (myState.isPacman()): features['onDefense'] = 0 enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman() and a.getPosition() is not None ] features['numInvaders'] = len(invaders) if (len(invaders) > 0): dists = [ self.getMazeDistance(myPos, a.getPosition()) for a in invaders ] features['invaderDistance'] = min(dists) if (action == Directions.STOP): features['stop'] = 1 rev = Directions.REVERSE[gameState.getAgentState( self.index).getDirection()] if (action == rev): features['reverse'] = 1 return features