def _reward(self, gameState, action): score_before = gameState.data.score gameState = gameState.generateSuccessor(self.index, self.actions[action]) score_after = gameState.data.score redFoodEaten = self.numRedFood - len(gameState.getRedFood().asList()) blueFoodEaten = self.numBlueFood - len(gameState.getBlueFood().asList()) if gameState.isOnRedTeam(self.index): reward = blueFoodEaten - redFoodEaten + CaptureAgent.getScore(self,gameState)*100 else: reward = redFoodEaten - blueFoodEaten + CaptureAgent.getScore(self,gameState)*100 return reward
def getScore(self, state): """ Override the getScore function to include some more Information """ score = CaptureAgent.getScore(self, state) score -= len(self.getCapsules(state)) return score
def observationFunction(self, gameState): """ This is where we ended up after our last action. The simulation should somehow ensure this is called """ if not CaptureAgent.getPreviousObservation is None: reward = gameState.getScore() - CaptureAgent.getScore( CaptureAgent, CaptureAgent.getPreviousObservation) self.observeTransition(CaptureAgent.getPreviousObservation, self.lastAction, gameState, reward) return gameState
def get_score_factor(self): score_diff = CaptureAgent.getScore(self.root_agent_object, self.node_state) return 10000 * score_diff