def plot(self, x, y, weights=None, title='Linear Regression'): """ Plot the input values x with their corresponding output values y (either true or predicted). Also, plot the linear regression line if weights are given; assuming h_w(x) = weights[0]*x + weights[1]. This will draw on the existing pacman window (clearing it first) or create a new one if no window exists. x: array or list of N scalar values. y: array or list of N scalar values. weights: array or list of 2 values (or if just one value, the bias weight is assumed to be zero). If None, no line is drawn. Default: None """ if np.array(x).size == 0: return if isinstance(x[0], np.ndarray): # Scrape the first element of each data point x = [data[0] for data in x] xmin = int(math.floor(min(x))) ymin = int(math.floor(min(y))) xmax = int(math.ceil(max(x))) ymax = int(math.ceil(max(y))) width = xmax - xmin + 3 height = ymax - ymin + 3 self.initPlot(xmin, ymin, width, height) gameState = self.blankGameState.deepCopy() gameState.agentStates = [] # Put pacman in bottom left if self.addPacmanToLineStart is True: gameState.agentStates.append( AgentState(Configuration((1, 1), Directions.STOP), True)) # Add ghost at each point for (px, py) in zip(x, y): point = (px + self.xShift, py + self.yShift) gameState.agentStates.append( AgentState(Configuration(point, Directions.STOP), False)) # self.initialize(gameState) graphicsUtils.clear_screen() self.infoPane = InfoPane(gameState.layout, self.gridSize) self.drawStaticObjects(gameState) self.drawAgentObjects(gameState) graphicsUtils.changeText(self.infoPane.scoreText, title) graphicsUtils.refresh() graphicsUtils.sleep(1) if weights is not None: self.setWeights(weights)
def fixState(gameState, index, enemyIndices): #self.gameState = gameState pos = gameState.getAgentPosition(index) positions = [] for playerIndex in range(gameState.getNumAgents()): if playerIndex in enemyIndices: if gameState.getAgentPosition(playerIndex): positions.append( gameState.getAgentPosition(playerIndex)) else: dist = gameState.getAgentDistances()[playerIndex] enemyPositions = inverseManhattanDistance(pos, dist, gameState) #can put nonrandom choice here positions.append(random.choice(enemyPositions)) else: positions.append( gameState.getAgentPosition(playerIndex)) for i in range(gameState.getNumAgents()): if gameState.data.agentStates[i].configuration is None: isPacman = i%2 == 1 and gameState.isRed(positions[i]) or i % 2 == 0 and not gameState.isRed(positions[i]) gameState.data.agentStates[i] = AgentState(Configuration(positions[i], 'Stop'), isPacman) return gameState
def placeGhost(ghostState, agentIndex): pos = (agentIndex * 2 - 1, 1) direction = Directions.STOP ghostState.configuration = Configuration(pos, direction)
def onRightSide(state, x, y): dummyConfig = Configuration((x, y), 'North') return state.isRed(dummyConfig) == isRed
def dumpFoodFromDeath(state, agentState, agentIndex): if not (DUMP_FOOD_ON_DEATH): # this feature is not turned on return if not agentState.isPacman: raise Exception( 'something is seriously wrong, this agent isnt a pacman!') # ok so agentState is this: if (agentState.numCarrying == 0): return # first, score changes! # we HACK pack that ugly bug by just determining if its red based on the first position # to die... dummyConfig = Configuration(agentState.getPosition(), 'North') isRed = state.isRed(dummyConfig) # the score increases if red eats dots, so if we are refunding points, # the direction should be -1 if the red agent died, which means he dies # on the blue side scoreDirection = (-1)**(int(isRed) + 1) #state.data.scoreChange += scoreDirection * agentState.numCarrying def onRightSide(state, x, y): dummyConfig = Configuration((x, y), 'North') return state.isRed(dummyConfig) == isRed # we have food to dump # -- expand out in BFS. Check: # - that it's within the limits # - that it's not a wall # - that no other agents are there # - that no power pellets are there # - that it's on the right side of the grid def allGood(state, x, y): width, height = state.data.layout.width, state.data.layout.height food, walls = state.data.food, state.data.layout.walls # bounds check if x >= width or y >= height or x <= 0 or y <= 0: return False if walls[x][y]: return False if food[x][y]: return False # dots need to be on the side where this agent will be a pacman :P if not onRightSide(state, x, y): return False if (x, y) in state.data.capsules: return False # loop through agents agentPoses = [ state.getAgentPosition(i) for i in range(state.getNumAgents()) ] if (x, y) in agentPoses: return False return True numToDump = agentState.numCarrying state.data.food = state.data.food.copy() foodAdded = [] def genSuccessors(x, y): DX = [-1, 0, 1] DY = [-1, 0, 1] return [(x + dx, y + dy) for dx in DX for dy in DY] # BFS graph search positionQueue = [agentState.getPosition()] seen = set() while numToDump > 0: if not len(positionQueue): raise Exception('Exhausted BFS! uh oh') # pop one off, graph check popped = positionQueue.pop(0) if popped in seen: continue seen.add(popped) x, y = popped[0], popped[1] x = int(x) y = int(y) if (allGood(state, x, y)): state.data.food[x][y] = True foodAdded.append((x, y)) numToDump -= 1 # generate successors positionQueue = positionQueue + genSuccessors(x, y) state.data._foodAdded = foodAdded # now our agentState is no longer carrying food agentState.numCarrying = 0 pass
def plot(self, x, y, weights=None, title='Linear Classification'): """ Plot the 2D input points, data[i], colored based on their corresponding labels (either true or predicted). Also, plot the linear separator line if weights are given. This will draw on the existing pacman window (clearing it first) or create a new one if no window exists. x: list of 2D points, where each 2D point in the list is a 2 element numpy.ndarray y: list of N labels, one for each point in data. Labels can be of any type that can be converted a string. weights: array of 3 values the first two are the weight on the data and the third value is the bias weight term. If there are only 2 values in weights, the bias term is assumed to be zero. If None, no line is drawn. Default: None """ if np.array(x).size == 0: return # Process data, sorting by label possibleLabels = list(set(y)) sortedX1 = {} sortedX2 = {} for label in possibleLabels: sortedX1[label] = [] sortedX2[label] = [] for i in range(len(x)): sortedX1[y[i]].append(x[i][0]) sortedX2[y[i]].append(x[i][1]) x1min = float("inf") x1max = float("-inf") for x1Values in sortedX1.values(): x1min = min(min(x1Values), x1min) x1max = max(max(x1Values), x1max) x2min = float("inf") x2max = float("-inf") for x2Values in sortedX2.values(): x2min = min(min(x2Values), x2min) x2max = max(max(x2Values), x2max) x1min = int(math.floor(x1min)) x1max = int(math.ceil(x1max)) x2min = int(math.floor(x2min)) x2max = int(math.ceil(x2max)) width = x1max - x1min + 3 height = x2max - x2min + 3 self.initPlot(x1min, x2min, width, height) gameState = self.blankGameState.deepCopy() gameState.agentStates = [] # Add ghost/pacman at each point for (labelIndex, label) in enumerate(possibleLabels): pointsX1 = sortedX1[label] pointsX2 = sortedX2[label] for (px, py) in zip(pointsX1, pointsX2): point = (px + self.xShift, py + self.yShift) agent = AgentState(Configuration(point, Directions.STOP), False) agent.isPacman = (labelIndex == 0) if labelIndex == 2: agent.scaredTimer = 1 gameState.agentStates.append(agent) # self.initialize(gameState) graphicsUtils.clear_screen() self.infoPane = InfoPane(gameState.layout, self.gridSize) self.drawStaticObjects(gameState) self.drawAgentObjects(gameState) graphicsUtils.changeText(self.infoPane.scoreText, title) graphicsUtils.refresh() if weights is not None: self.setWeights(weights)
def plot(self, x, y, weights=None, title='Logistic Regression'): """ Plot the 1D input points, data[i], colored based on their corresponding labels (either true or predicted). Also, plot the logistic function fit if weights are given. This will draw on the existing pacman window (clearing it first) or create a new one if no window exists. x: list of 1D points, where each 1D point in the list is a 1 element numpy.ndarray y: list of N labels, one for each point in data. Labels can be of any type that can be converted a string. weights: array of 2 values the first one is the weight on the data and the second value is the bias weight term. If there are only 1 values in weights, the bias term is assumed to be zero. If None, no line is drawn. Default: None """ if np.array(x).size == 0: return # Process data, sorting by label possibleLabels = list(set(y)) sortedX = {} for label in possibleLabels: sortedX[label] = [] for i in range(len(x)): sortedX[y[i]].append(x[i]) xmin = int(math.floor(min(x))) xmax = int(math.ceil(max(x))) ymin = int(math.floor(0)) - 1 ymax = int(math.ceil(1)) width = xmax - xmin + 3 height = ymax - ymin + 3 self.initPlot(xmin, ymin, width, height) gameState = self.blankGameState.deepCopy() gameState.agentStates = [] # Put pacman in bottom left if self.addPacmanToLineStart is True: gameState.agentStates.append( AgentState(Configuration((1, 1), Directions.STOP), True)) # Add ghost at each point for (py, label) in enumerate(possibleLabels): pointsX = sortedX[label] for px in pointsX: point = (px + self.xShift, py + self.yShift) agent = AgentState(Configuration(point, Directions.STOP), False) agent.isPacman = 1 - py gameState.agentStates.append(agent) # self.initialize(gameState) graphicsUtils.clear_screen() self.infoPane = InfoPane(gameState.layout, self.gridSize) self.drawStaticObjects(gameState) self.drawAgentObjects(gameState) graphicsUtils.changeText(self.infoPane.scoreText, title) graphicsUtils.refresh() if weights is not None: self.setWeights(weights)
def chooseAction(self, gameState): global distancebeliefs, legalMoves, distanceToOpponentFood start = time.time() # Update last moves self.lastMoves.insert(0, gameState.getAgentPosition(self.index)) if len(self.lastMoves) > self.lastMoveCount: self.lastMoves.pop() # Init starting food if not self.firstTurnComplete: self.firstTurnComplete = True self.startingFood = len( self.getFoodYouAreDefending(gameState).asList()) self.theirStartingFood = len(self.getFood(gameState).asList()) if legalMoves is None: legalMoves = self.getLegalMoves(gameState) opponents = self.getOpponents(gameState) self.log( 1, '%s %d (%s):' % ({ True: "Red", False: "Blue" }[self.red], self.index, self.weightStrategy.getName())) # update opponent observations for Bayesian inference previousAgent = (self.index - 1) % gameState.getNumAgents() for opponent in opponents: if self.firstTurnComplete and opponent == previousAgent: self.elapseTime(gameState, opponent) self.observation(gameState, opponent) if self.debug >= 1: # Display belief distributions display = [] for agentIndex in range(gameState.getNumAgents()): if distancebeliefs.has_key(agentIndex): display.append(distancebeliefs[agentIndex]) else: display.append(None) self.displayDistributionsOverPositions(display) # Compute distance to the nearest food foodList = self.getFood(gameState).asList() if len( foodList ) > 0: # This should always be True, but better safe than sorry minDistance = min([ self.getMazeDistance(gameState.getAgentPosition(self.index), food) for food in foodList ]) distanceToOpponentFood[self] = minDistance # Sort agents in ascending order of distance to opponent food, # plus a small bias towards the current level of aggressiveness agentDist = sorted( distanceToOpponentFood.items(), key=lambda x: (x[0].weightStrategy.getAggressiveness() * -2 + x[1])) # Assign weight strategies, with the most aggressive given to the closest to opponent food # and the least aggressive given to the furthest away for (agent, dist), weightStrategy in zip(agentDist, weightStrategies): if agent.weightStrategy.getName( ) is not weightStrategy.getName(): self.log( 1, 'agent %d at distance %d now has strategy %s' % (agent.index, dist, weightStrategy.getName())) agent.weightStrategy = weightStrategy """ Picks among the actions with the highest Q(s,a). """ actions = gameState.getLegalActions(self.index) random.shuffle(actions) # Instead of dealing with belief distributions, make a fake game state # which just assigns the most likely positions for the opponents # so we can do standard minimax beliefGameState = gameState.deepCopy() for agentIndex in range(beliefGameState.getNumAgents()): if distancebeliefs.has_key(agentIndex): pos = distancebeliefs[agentIndex].argMax() beliefGameState.getAgentState( agentIndex).configuration = Configuration( pos, Directions.STOP) if self.debug >= 3: # Print the weights and features which are currently affecting the agent self.log(3, 'feature, value, weight, total') weights = self.getWeights(beliefGameState, Directions.STOP) for (feature, value) in self.getFeatures(beliefGameState, Directions.STOP).items(): weight = 0.0 if feature in weights: weight = weights[feature] self.log( 3, '%s, %f, %f, %f' % (feature, value, weight, value * weight)) if self.nonIterative: values = [ self.evaluateMinimax(beliefGameState, a, self.maxPlys, self.index, "-inf", "+inf", start) for a in actions ] else: """ Minimax with iterative deepening and alpha-beta pruning. """ depth = 0 while True: try: # Start minimax for the given ply depth, with alpha=-inf and beta=+inf, # including the action start time so we can quit before the timeout values = [ self.evaluateMinimax(beliefGameState, a, depth, self.index, float('-inf'), float('+inf'), start) for a in actions ] if self.debug >= 3: # Log the action we would take if we stopped at this level maxValue = reduce(max, values) bestActions = [ a for a, v in zip(actions, values) if v == maxValue ] action = random.choice(bestActions) self.log( 3, 'eval time for agent %d at depth %d: %.4f (action %s)' % (self.index, depth, time.time() - start, action)) else: self.log( 2, 'eval time for agent %d at depth %d: %.4f' % (self.index, depth, time.time() - start)) except Exception: # We timed out; use the previous level's values rather than incomplete data self.log( 2, 'throttled agent %d at depth %d: %.4f' % (self.index, depth, time.time() - start)) break # We must go deeper depth = depth + 2 if (self.maxPlys > -1) and depth > self.maxPlys: # We must not go deeper self.log( 2, 'max plys reached for agent %d at depth %d: %.4f' % (self.index, depth, time.time() - start)) break # Determine the best possible Q() and all actions which achieve it, # and pick one of those at random maxValue = reduce(max, values) actionsValues = zip(actions, values) bestActions = [a for a, v in actionsValues if v == maxValue] action = random.choice(bestActions) self.log(1, actionsValues) self.log( 1, 'agent %d selected action %s with value %s (time %.4f)' % (self.index, action, maxValue, time.time() - start)) return action
def terminalEvaluation(self, gameState, foodLeft) : try: foodLeft1 = self.getFood(gameState).asList() terminal = [foodLeft-len(foodLeft1), 'Stop'] foodNearest = float("inf") redIndex=gameState.getRedTeamIndices() blueIndex=gameState.getBlueTeamIndices() for a in foodLeft1 : #Pellet 섭취 여부와 가장 가까운 Pellet의 거리 계산 dist = self.getMazeDistance(a, gameState.getAgentPosition(self.index)) if(dist<foodNearest): foodNearest = dist if(len(foodLeft1) == 0) : foodNearest = 0 configuration = Configuration(gameState.getAgentPosition(self.index),Directions.STOP) #좌표,Action agentState = AgentState(configuration,True) State = agentState.copy() if(self.isRed): # RED 일때 enemyDistance= 0 distanceFromDefend = 0 if(gameState.getAgentPosition(blueIndex[0]) != None) : enemyDistance1= self.getMazeDistance(gameState.getAgentPosition(self.index), gameState.getAgentPosition(blueIndex[0])) if(gameState.getAgentPosition(blueIndex[0])[0]>= 15 and enemyDistance1<= 6): #적과의 거리 enemyDistance-= (7-enemyDistance1)** self.weight["EnemyBaseOppAgentDist"] if(gameState.getAgentPosition(blueIndex[0])[0]<= 14 and enemyDistance1<= 6): enemyDistance+= (7-enemyDistance1)* self.weight["OurBaseOppAgentDist"] if(gameState.getAgentPosition(blueIndex[1])!=None) : enemyDistance2= self.getMazeDistance(gameState.getAgentPosition(self.index), gameState.getAgentPosition(blueIndex[1])) if(gameState.getAgentPosition(blueIndex[1])[0]>= 15 and enemyDistance2<= 6): #적과의 거리 enemyDistance-= (7-enemyDistance2)** self.weight["EnemyBaseOppAgentDist"] if(gameState.getAgentPosition(blueIndex[1])[0]<= 14 and enemyDistance2<= 6): enemyDistance+= (7-enemyDistance2)* self.weight["OurBaseOppAgentDist"] if(gameState.getScore()> 0) : distanceFromDefend-=(self.getMazeDistance(gameState.getAgentPosition(self.index), (12,10) ))**2 else: # BLUE 일때 enemyDistance= 0 distanceFromDefend = 0 if(gameState.getAgentPosition(redIndex[0])!=None) : enemyDistance1= self.getMazeDistance(gameState.getAgentPosition(self.index), gameState.getAgentPosition(redIndex[0])) if(gameState.getAgentPosition(redIndex[0])[0]<= 14 and enemyDistance1<= 6): #적과의 거리 enemyDistance-= (7-enemyDistance1)** self.weight["EnemyBaseOppAgentDist"] if(gameState.getAgentPosition(redIndex[0])[0]>= 15 and enemyDistance1<= 6): enemyDistance+= (7-enemyDistance1)* self.weight["OurBaseOppAgentDist"] if(gameState.getAgentPosition(redIndex[1])!=None) : enemyDistance2= self.getMazeDistance(gameState.getAgentPosition(self.index), gameState.getAgentPosition(redIndex[1])) if(gameState.getAgentPosition(redIndex[1])[0]<= 14 and enemyDistance2<= 6): #적과의 거리 enemyDistance-= (7-enemyDistance2)** self.weight["EnemyBaseOppAgentDist"] if(gameState.getAgentPosition(redIndex[1])[0]>= 15 and enemyDistance2<= 6): enemyDistance+= (7-enemyDistance2)* self.weight["OurBaseOppAgentDist"] if(gameState.getScore()< 0) : distanceFromDefend-=(self.weight["DefensePointDist"]* self.getMazeDistance(gameState.getAgentPosition(self.index), (19,5) ))**2 terminal[0] = terminal[0]*self.weight["PalletNum"] + foodNearest*self.weight["NearestPallet"] + distanceFromDefend + enemyDistance return terminal except: return 10
def on_right_side(state, x, y): dummy_config = Configuration((x, y), 'North') return state.is_red(dummy_config) == is_red