def newEpisode(self): if self.learning: params = ravel(self.explorationlayer.module.params) target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500) if target != 0.0: self.gp.addSample(params, target) if len(self.gp.trainx) > 20: self.gp.trainx = self.gp.trainx[-20:, :] self.gp.trainy = self.gp.trainy[-20:] self.gp.noise = self.gp.noise[-20:] self.gp._calculate() # get new parameters where mean was highest max_cov = diag(self.gp.pred_cov).max() indices = where(diag(self.gp.pred_cov) == max_cov)[0] pick = indices[random.randint(len(indices))] new_param = self.gp.testx[pick] # check if that one exists already in gp training set if len(where(self.gp.trainx == new_param)[0]) > 0: # add some normal noise to it new_param += random.normal(0, 1, len(new_param)) self.explorationlayer.module._setParameters(new_param) else: self.explorationlayer.drawRandomWeights() # don't call StateDependentAgent.newEpisode() because it randomizes the params LearningAgent.newEpisode(self)
def newEpisode(self): """ indicates a new episode in the training cycle. """ LearningAgent.newEpisode(self) self.learner.perturbate()
def newEpisode(self): LearningAgent.newEpisode(self) self.explorationlayer.drawRandomWeights()