コード例 #1
0
 def __init__(self, index):
     CaptureAgent.__init__(self, index)
     self.firstTurnComplete = False
     self.startingFood = 0
     self.theirStartingFood = 0
     self.discount = .9
     self.alpha = 0.002
     self.featureHandler = FeatureHandler()
     self.agentType = 'basicQLearningAgent'
     self.weights = None
     self.explorationRate = 0.3
コード例 #2
0
class QLearningAgent(ReflexCaptureAgent):
    targetedLocations = []
    
    def __init__(self, index):
        CaptureAgent.__init__(self, index)
        self.firstTurnComplete = False
        self.startingFood = 0
        self.theirStartingFood = 0
        self.discount = .9
        self.alpha = 0.002
        self.featureHandler = FeatureHandler()
        self.agentType = 'basicQLearningAgent'
        self.weights = None
        self.explorationRate = 0.3
    
    def registerInitialState(self, state):
        CaptureAgent.registerInitialState(self, state)
        self.startingFood = len(self.getFood(state).asList())
        self.theirStartingFood = len(self.getFoodYouAreDefending(state).asList())

                                
    
    def getFeatures(self, gameState, action):
        """
            Returns a counter of features for the state
            """
        
        successor = self.getSuccessor(gameState, action)
        position = self.getPosition(gameState)
        nextPosition = self.getPosition(successor)
        nextPositionAsInt = self.getPositionAsInt(successor)
        
        
        #oldFeatures = self.getMutationFeatures(gameState, action)
        
        #features['isAgentInEnemyTerritory'] = self.isPositionInEnemyTerritory(successor, position)
        #features['isAgentAPacman'] = self.isPacman(successor)
        
        features = util.Counter()
        #for oldFeature, value in oldFeatures.items():
            #features[oldFeature] = value
        
        #features['degreesOfFreedom'] = len(self.getLegalActions(successor))
        
    
        """avgFriendDist = 0.0
        for fpos in self.getTeamPositions(successor):
            avgFriendDist += self.getMazeDistance(position, fpos)
        
        avgFriendDist /= (len(self.getTeam(gameState)) - 1)
         """   
        features['avgFriendDist'] = sum([self.getMazeDistance(position, fpos) for fpos in self.getTeamPositions(successor)]) / (len(self.getTeam(gameState)) - 1)
        
        """features['numberOfEnemies'] = 0
            #for enemy in self.getOpponents(successor):
        features['numberOfEnemies'] += 1"""
          
        
       # defenseFoodDists = [self.getMazeDistance(nextPosition, foodPos) for foodPos in self.getFoodYouAreDefending(successor).asList()]        
        #features['numberOfYourFoodsRemaining'] = len(self.getFoodYouAreDefending(successor).asList())
        #features['distanceToClosestYouFood'] = min(defenseFoodDists)
        
        features['eatingFood'] = 1.0 if self.isPacman(successor) and gameState.hasFood(nextPositionAsInt[0], nextPositionAsInt[1]) else 0.0
        
        #features['notMoving'] = 1.0 if position == nextPosition else 0.0
        
        foodDists = [self.getMazeDistance(nextPosition, foodPos) for foodPos in self.getFood(successor).asList()]
        
        features['numberOfEnemyFoodsRemaining'] = len(self.getFood(successor).asList())
        features['distancetoClosestEnemyFood'] = 0 if features['eatingFood'] > 0 else min(foodDists)
        #features['distanceToClosestFoodSquared'] = min(foodDists) ** 2
        print "FEATURES: ", features
        return features
    
    def getWeights(self, gameState, action):
        """
            Normally, weights do not depend on the gamestate.    They can be either
            a counter or a dictionary.
            """
        #self.weights['numberOfEnemyFoodsRemaining'] = -10
        #self.weights['distancetoClosestEnemyFood'] = -150
        #self.weights['degreesOfFreedom'] = 25
        
        if self.weights == None:
            self.weights = util.Counter()
                #for feature in self.getFeatures(gameState, 'Stop'):
            #self.weights[feature] = self.getStartingWeight(feature)
            self.weights = self.featureHandler.getFeatureWeights('basicQlearningAgent')
            print 'WEIGHTS'
            print self.weights
        
        return self.weights
    
    def getReward(self, state):
        prevState = self.getPreviousObservation()
        
        if prevState == None: return 0
        
        reward = 0
        if self.getPosition(state) in self.getFood(prevState).asList():
            reward += 10
            
        #reward += (len(self.getFood(state).asList()) - len(self.getFood(prevState).asList()))
        
        prevPosition = self.getPosition(prevState)
        currPosition = self.getPosition(state)
        if self.getMazeDistance(prevPosition, currPosition) > 1:
            reward -= 40
        
        return reward
    
    
    def getValue(self, state):
        action = self.getBestAction(state)
        if action == None:
            return 0.0
        return self.evaluate(state, action)
    
    
    def getBestAction(self, state):
        return ReflexCaptureAgent.chooseAction(self, state)
    
    
    
    def chooseAction(self, state):
        #return random.choice( state.getLegalActions( self.index ) )
        if not self.firstTurnComplete:
            self.registerInitialState(state)
            self.firstTurnComplete = True
        

        """
        Picks among the actions with the highest Q(s,a).
        """
        actions = state.getLegalActions(self.index)
    
        if util.flipCoin(self.explorationRate):
            return random.choice(actions)
        
        # You can profile your evaluation time by uncommenting these lines
        # start = time.time()
        values = [(a, self.evaluate(state, a)) for a in actions]
        # print 'eval time for agent %d: %.4f' % (self.index, time.time() - start)
    
    
        #print 'VALUES: ' + str(values)  
        maxValue = max(values, key=lambda val : val[1])
        bestActions = [a for a, v in zip(actions, values) if v == maxValue]
    
        action = random.choice(bestActions)
                
        self.update(state, action, self.getSuccessor(state, action))
      
                
        #print 'Features: ' + str(self.getFeatures)
        #print 'Weights: ' + str(self.weights)
        #print 'Action: ' + str(action) + ' - ' + str(self.getPosition(state)) + '--->' + str(self.getPosition(self.getSuccessor(state, action)))
        return action
    
    
    def dictNormalize(self, dict):
        total = float(sum(dict.values()))
        if total == 0: return
        for key in dict.keys():
            dict[key] = dict[key] / total
    
        return dict
    
    def getStartingWeight(self, feature):
        if feature in highMutation.cautiousOWeightsDict:
            return highMutation.cautiousOWeightsDict[feature]
            
        if feature == 'degreesOfFreedom': return 0.5
        if feature == 'numberOfYourFoodsRemaining': return 0.25
        if feature == 'numberOfEnemyFoodsRemaining': return -0.25
        if feature == 'distancetoClosestEnemyFood': return -0.5
        if feature == 'distanceToClosestYouFood': return -0.4
        if feature == 'eatingFood': return 1
        if feature == 'notMoving': return -1
        if feature == 'avgFriendDist': return 0.25
        
        return 0
        
    
    def update(self, state, action, nextState):
        features = self.getFeatures(state, action)
        weights = self.getWeights(state, action)
        
        correction = (self.getReward(state) + self.discount * self.getValue(nextState)) - self.evaluate(state, action)
        #print 'CORRECTION: ' + str(correction)
        #print 'REWARD: ' + str(self.getReward(state))
        #print 'NEXT VALUE: ' + str(self.getValue(nextState))
        #print 'EVAL: ' + str(self.evaluate(state, action))
                             
        for feature in features.keys():
            weights[feature] = weights[feature] + correction * self.alpha if feature in weights else self.getStartingWeight(feature)
        #weights[feature] = self.getStartingWeight(feature)
        #weights = self. dictNormalize(weights)
        
        self.weights = weights
        self.featureHandler.updateFeatureWeights(weights, 'basicQlearningAgent')
    
    
    def getMutationFeatures(self, gameState, action):
        features = util.Counter()
        successor = self.getSuccessor(gameState, action)
        position = self.getPosition(gameState)
        
        
        
        distances = 0.0
        for tpos in self.getTeamPositions(successor):
            distances = distances + abs(tpos[0] - position[0])
        features['xRelativeToFriends'] = distances
    
        enemyX = 0.0
        for epos in self.getOpponentPositions(successor):
            if epos is not None:
                enemyX = enemyX + epos[0]
        features['avgEnemyX'] = distances
        
        foodLeft = len(self.getFoodYouAreDefending(successor).asList())
        features['percentOurFoodLeft'] = foodLeft / self.startingFood
        
        foodLeft = len(self.getFood(successor).asList())
        features['percentTheirFoodLeft'] = foodLeft / self.theirStartingFood
        
        features['IAmAScaredGhost'] = 1.0 if self.isPacman(successor) and self.getScaredTimer(successor) > 0 else 0.0
        
        features['enemyPacmanNearMe'] = 0.0
        minOppDist = 10000
        minOppPos = (0, 0)
        for ep in self.getOpponentPositions(successor):
            # For a feature later on
            if ep is not None and self.getMazeDistance(ep, position) < minOppDist:
                minOppDist = self.getMazeDistance(ep, position)
                minOppPos = ep
            if ep is not None and self.getMazeDistance(ep, position) <= 1 and self.isPositionInTeamTerritory(successor, ep):
                features['enemyPacmanNearMe'] = 1.0
        
        features['numSameFriends'] = 0
        for friend in self.getTeam(successor):
            if successor.getAgentState(self.index).isPacman is self.isPacman(successor):
                features['numSameFriends'] = features['numSameFriends'] + 1
        
        # Compute distance to the nearest food
        foodList = self.getFood(successor).asList()
        if len(foodList) > 0: # This should always be True,    but better safe than sorry
            minDiffDistance = min([1000] + [self.getMazeDistance(position, food) - self.getMazeDistance(minOppPos, food) for food in foodList if minOppDist < 1000])
            features['blockableFood'] = 1.0 if minDiffDistance < 1.0 else 0.0
        return features