Esempio n. 1
0
 def final(self, gameState):
     print("Weights", self.index, ":")
     print(self.getWeights())
     self.gameNumber += 1
     self.alpha -= self.alphaDecay
     self.alpha = max(self.alpha, self.minAlpha)
     CaptureAgent.final(self, gameState)
Esempio n. 2
0
 def final(self, state):
     "Called at the end of each game."
     # call the super-class final method
     CaptureAgent.final(self, state)
     #if self.isTraining() and DEBUG:
     #print "END WEIGHTS"
     #print self.weights
     self.episodesSoFar += 1
Esempio n. 3
0
 def final(self, gameState):
     print("\nWeights defensive")
     print(self.getWeights())
     print()
     self.gameNumber += 1
     self.alpha -= self.alphaDecay
     self.alpha = max(self.alpha, self.minAlpha)
     CaptureAgent.final(self, gameState)
 def final(self, state):
     "Called at the end of each game."
     # call the super-class final method
     CaptureAgent.final(self, state)
     if self.isTraining() and DEBUG:
         print "END WEIGHTS"
         print self.weights
     self.episodesSoFar += 1
     if self.episodesSoFar == self.numTraining:
         print "FINISHED TRAINING"
Esempio n. 5
0
 def final(self, gameState):
     self.stopEpisode()
     if self.isInTraining():
         self.makeUpdate()
         print 'Training:{'
     else:
         print 'Testing:{'
     print self.getScore(self.getCurrentObservation())
     print '}'
     CaptureAgent.final(self, gameState)
Esempio n. 6
0
    def final(self, gameState):
        # update on last movement
        rewardChange = (gameState.getScore() - self.lastState.getScore())
        self.observeTransition(self.lastState, self.lastAction, gameState,
                               rewardChange)

        #clear observation history list
        CaptureAgent.final(self, gameState)

        # stop episode
        self.endOneEpisode()

        print(gameState)
Esempio n. 7
0
    def final(self, gameState):
        """
        Receive final game state
        """
        CaptureAgent.final(self, gameState)

        if TRAIN:
            for i, (weight, bias) in enumerate(self._omodel):
                np.save("oweight%d" % i, weight)
                np.save("obias%d" % i, bias)
            for i, (weight, bias) in enumerate(self._dmodel):
                np.save("dweight%d" % i, weight)
                np.save("dbias%d" % i, bias)
            for i, (weight, bias) in enumerate(self._fmodel):
                np.save("fweight%d" % i, weight)
                np.save("fbias%d" % i, bias)
    def final(self, state):
        "Called at the end of each game."

        self.observationFunction(state)

        CaptureAgent.final(self, state)
        self.currentEpisode += 1
        print("Episode number: %d" % (self.currentEpisode))
        print("Episode rewards: %d" % (self.episodeRewards))
        self.episodeRewards = 0
        print(self.weights)
        if self.currentEpisode == self.numTraining:
            print
            "FINISHED TRAINING"
            with open('weights' + str(self.index), 'wb') as fp:
                pickle.dump(self.weights, fp)
Esempio n. 9
0
    def final(self, state):
        """
      Called by Pacman game at the terminal state
    """
        CaptureAgent.final(self, state)
        deltaReward = state.getScore() - self.lastState.getScore()
        self.observeTransition(self.lastState, self.lastAction, state,
                               deltaReward)
        self.stopEpisode()

        # Make sure we have this var
        if not 'episodeStartTime' in self.__dict__:
            self.episodeStartTime = time.time()
        if not 'lastWindowAccumRewards' in self.__dict__:
            self.lastWindowAccumRewards = 0.0
        self.lastWindowAccumRewards += state.getScore()

        NUM_EPS_UPDATE = 100
        if self.episodesSoFar % NUM_EPS_UPDATE == 0:
            print 'Reinforcement Learning Status:'
            windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
            if self.episodesSoFar <= self.numTraining:
                trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
                print '\tCompleted %d out of %d training episodes' % (
                    self.episodesSoFar, self.numTraining)
                print '\tAverage Rewards over all training: %.2f' % (trainAvg)
            else:
                testAvg = float(self.accumTestRewards) / (self.episodesSoFar -
                                                          self.numTraining)
                print '\tCompleted %d test episodes' % (self.episodesSoFar -
                                                        self.numTraining)
                print '\tAverage Rewards over testing: %.2f' % testAvg
            print '\tAverage Rewards for last %d episodes: %.2f' % (
                NUM_EPS_UPDATE, windowAvg)
            print '\tEpisode took %.2f seconds' % (time.time() -
                                                   self.episodeStartTime)
            self.lastWindowAccumRewards = 0.0
            self.episodeStartTime = time.time()

        if self.episodesSoFar == self.numTraining:
            msg = 'Training Done (turning off epsilon and alpha)'
            print '%s\n%s' % (msg, '-' * len(msg))
            print self.getWeights()
Esempio n. 10
0
  def final(self, state):
    """
      Called by Pacman game at the terminal state
    """
    CaptureAgent.final(self, state)
    deltaReward = state.getScore() - self.lastState.getScore()
    self.observeTransition(self.lastState, self.lastAction, state, deltaReward)
    self.stopEpisode()

    # Make sure we have this var
    if not 'episodeStartTime' in self.__dict__:
        self.episodeStartTime = time.time()
    if not 'lastWindowAccumRewards' in self.__dict__:
        self.lastWindowAccumRewards = 0.0
    self.lastWindowAccumRewards += state.getScore()

    NUM_EPS_UPDATE = 100
    if self.episodesSoFar % NUM_EPS_UPDATE == 0:
        print 'Reinforcement Learning Status:'
        windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE)
        if self.episodesSoFar <= self.numTraining:
            trainAvg = self.accumTrainRewards / float(self.episodesSoFar)
            print '\tCompleted %d out of %d training episodes' % (
                   self.episodesSoFar,self.numTraining)
            print '\tAverage Rewards over all training: %.2f' % (
                    trainAvg)
        else:
            testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining)
            print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining)
            print '\tAverage Rewards over testing: %.2f' % testAvg
        print '\tAverage Rewards for last %d episodes: %.2f'  % (
                NUM_EPS_UPDATE,windowAvg)
        print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime)
        self.lastWindowAccumRewards = 0.0
        self.episodeStartTime = time.time()

    if self.episodesSoFar == self.numTraining:
        msg = 'Training Done (turning off epsilon and alpha)'
        print '%s\n%s' % (msg,'-' * len(msg))
        print self.getWeights()
Esempio n. 11
0
 def final(self, gameState):
     # print "Warnings count:", self.warnings
     CaptureAgent.final(self, gameState)
 def final(self, gameState):
     CaptureAgent.final(self, gameState)
 def final(self, state):
     "Called at the end of each game."
     # call the super-class final method
     CaptureAgent.final(self, state)
Esempio n. 14
0
 def final(self, state):
     CaptureAgent.final(self, state)
     self.episodesCur += 1
Esempio n. 15
0
	def final(self, state):
		"Called at the end of each game."
		# call the super-class final method
		CaptureAgent.final(self, state)
		if self.isTraining() and DEBUG:
			self.episodesSoFar += 1