def final(self, gameState): print("Weights", self.index, ":") print(self.getWeights()) self.gameNumber += 1 self.alpha -= self.alphaDecay self.alpha = max(self.alpha, self.minAlpha) CaptureAgent.final(self, gameState)
def final(self, state): "Called at the end of each game." # call the super-class final method CaptureAgent.final(self, state) #if self.isTraining() and DEBUG: #print "END WEIGHTS" #print self.weights self.episodesSoFar += 1
def final(self, gameState): print("\nWeights defensive") print(self.getWeights()) print() self.gameNumber += 1 self.alpha -= self.alphaDecay self.alpha = max(self.alpha, self.minAlpha) CaptureAgent.final(self, gameState)
def final(self, state): "Called at the end of each game." # call the super-class final method CaptureAgent.final(self, state) if self.isTraining() and DEBUG: print "END WEIGHTS" print self.weights self.episodesSoFar += 1 if self.episodesSoFar == self.numTraining: print "FINISHED TRAINING"
def final(self, gameState): self.stopEpisode() if self.isInTraining(): self.makeUpdate() print 'Training:{' else: print 'Testing:{' print self.getScore(self.getCurrentObservation()) print '}' CaptureAgent.final(self, gameState)
def final(self, gameState): # update on last movement rewardChange = (gameState.getScore() - self.lastState.getScore()) self.observeTransition(self.lastState, self.lastAction, gameState, rewardChange) #clear observation history list CaptureAgent.final(self, gameState) # stop episode self.endOneEpisode() print(gameState)
def final(self, gameState): """ Receive final game state """ CaptureAgent.final(self, gameState) if TRAIN: for i, (weight, bias) in enumerate(self._omodel): np.save("oweight%d" % i, weight) np.save("obias%d" % i, bias) for i, (weight, bias) in enumerate(self._dmodel): np.save("dweight%d" % i, weight) np.save("dbias%d" % i, bias) for i, (weight, bias) in enumerate(self._fmodel): np.save("fweight%d" % i, weight) np.save("fbias%d" % i, bias)
def final(self, state): "Called at the end of each game." self.observationFunction(state) CaptureAgent.final(self, state) self.currentEpisode += 1 print("Episode number: %d" % (self.currentEpisode)) print("Episode rewards: %d" % (self.episodeRewards)) self.episodeRewards = 0 print(self.weights) if self.currentEpisode == self.numTraining: print "FINISHED TRAINING" with open('weights' + str(self.index), 'wb') as fp: pickle.dump(self.weights, fp)
def final(self, state): """ Called by Pacman game at the terminal state """ CaptureAgent.final(self, state) deltaReward = state.getScore() - self.lastState.getScore() self.observeTransition(self.lastState, self.lastAction, state, deltaReward) self.stopEpisode() # Make sure we have this var if not 'episodeStartTime' in self.__dict__: self.episodeStartTime = time.time() if not 'lastWindowAccumRewards' in self.__dict__: self.lastWindowAccumRewards = 0.0 self.lastWindowAccumRewards += state.getScore() NUM_EPS_UPDATE = 100 if self.episodesSoFar % NUM_EPS_UPDATE == 0: print 'Reinforcement Learning Status:' windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE) if self.episodesSoFar <= self.numTraining: trainAvg = self.accumTrainRewards / float(self.episodesSoFar) print '\tCompleted %d out of %d training episodes' % ( self.episodesSoFar, self.numTraining) print '\tAverage Rewards over all training: %.2f' % (trainAvg) else: testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining) print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining) print '\tAverage Rewards over testing: %.2f' % testAvg print '\tAverage Rewards for last %d episodes: %.2f' % ( NUM_EPS_UPDATE, windowAvg) print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime) self.lastWindowAccumRewards = 0.0 self.episodeStartTime = time.time() if self.episodesSoFar == self.numTraining: msg = 'Training Done (turning off epsilon and alpha)' print '%s\n%s' % (msg, '-' * len(msg)) print self.getWeights()
def final(self, state): """ Called by Pacman game at the terminal state """ CaptureAgent.final(self, state) deltaReward = state.getScore() - self.lastState.getScore() self.observeTransition(self.lastState, self.lastAction, state, deltaReward) self.stopEpisode() # Make sure we have this var if not 'episodeStartTime' in self.__dict__: self.episodeStartTime = time.time() if not 'lastWindowAccumRewards' in self.__dict__: self.lastWindowAccumRewards = 0.0 self.lastWindowAccumRewards += state.getScore() NUM_EPS_UPDATE = 100 if self.episodesSoFar % NUM_EPS_UPDATE == 0: print 'Reinforcement Learning Status:' windowAvg = self.lastWindowAccumRewards / float(NUM_EPS_UPDATE) if self.episodesSoFar <= self.numTraining: trainAvg = self.accumTrainRewards / float(self.episodesSoFar) print '\tCompleted %d out of %d training episodes' % ( self.episodesSoFar,self.numTraining) print '\tAverage Rewards over all training: %.2f' % ( trainAvg) else: testAvg = float(self.accumTestRewards) / (self.episodesSoFar - self.numTraining) print '\tCompleted %d test episodes' % (self.episodesSoFar - self.numTraining) print '\tAverage Rewards over testing: %.2f' % testAvg print '\tAverage Rewards for last %d episodes: %.2f' % ( NUM_EPS_UPDATE,windowAvg) print '\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime) self.lastWindowAccumRewards = 0.0 self.episodeStartTime = time.time() if self.episodesSoFar == self.numTraining: msg = 'Training Done (turning off epsilon and alpha)' print '%s\n%s' % (msg,'-' * len(msg)) print self.getWeights()
def final(self, gameState): # print "Warnings count:", self.warnings CaptureAgent.final(self, gameState)
def final(self, gameState): CaptureAgent.final(self, gameState)
def final(self, state): "Called at the end of each game." # call the super-class final method CaptureAgent.final(self, state)
def final(self, state): CaptureAgent.final(self, state) self.episodesCur += 1
def final(self, state): "Called at the end of each game." # call the super-class final method CaptureAgent.final(self, state) if self.isTraining() and DEBUG: self.episodesSoFar += 1