def __init__(self, module, learner = None): StateDependentAgent.__init__(self, module, learner) # gaussian process self.gp = GaussianProcess(self.explorationlayer.module.paramdim, -2, 2, 1) self.gp.mean = -1.5 self.gp.hyper = (2.0, 2.0, 0.1)
class StateDependentGPAgent(StateDependentAgent): """ StateDependentAgent is a learning agent, that adds a GaussianLayer to its module and stores its deterministic inputs (mu) in the dataset. It also trains a GaussianProcess with the drawn exploration weights and the reward as target for choosing the next exploration sample. """ def __init__(self, module, learner = None): StateDependentAgent.__init__(self, module, learner) # gaussian process self.gp = GaussianProcess(self.explorationlayer.module.paramdim, -2, 2, 1) self.gp.mean = -1.5 self.gp.hyper = (2.0, 2.0, 0.1) def newEpisode(self): if self.learning: params = ravel(self.explorationlayer.module.params) target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500) if target != 0.0: self.gp.addSample(params, target) if len(self.gp.trainx) > 20: self.gp.trainx = self.gp.trainx[-20:, :] self.gp.trainy = self.gp.trainy[-20:] self.gp.noise = self.gp.noise[-20:] self.gp._calculate() # get new parameters where mean was highest max_cov = diag(self.gp.pred_cov).max() indices = where(diag(self.gp.pred_cov) == max_cov)[0] pick = indices[random.randint(len(indices))] new_param = self.gp.testx[pick] # check if that one exists already in gp training set if len(where(self.gp.trainx == new_param)[0]) > 0: # add some normal noise to it new_param += random.normal(0, 1, len(new_param)) self.explorationlayer.module._setParameters(new_param) else: self.explorationlayer.drawRandomWeights() # don't call StateDependentAgent.newEpisode() because it randomizes the params LearningAgent.newEpisode(self)
def __init__(self, task, agent): EpisodicExperiment.__init__(self, task, agent) # create model and training set (action dimension + 1 for time) self.modelds = SequentialDataSet(self.task.indim + 1, 1) self.model = [ GaussianProcess(indim=self.modelds.getDimension('input'), start=(-10, -10, 0), stop=(10, 10, 300), step=(5, 5, 100)) for _ in range(self.task.outdim) ] # change hyper parameters for all gps for m in self.model: m.hyper = (20, 2.0, 0.01)
#!/usr/bin/env python """ A simple example on how to use the GaussianProcess class in pybrain, for one and two dimensions. """ __author__ = "Thomas Rueckstiess, [email protected]" from pybrain.auxiliary import GaussianProcess from pybrain.datasets import SupervisedDataSet from scipy import mgrid, sin, cos, array, ravel from pylab import show, figure ds = SupervisedDataSet(1, 1) gp = GaussianProcess(indim=1, start=-3, stop=3, step=0.05) figure() x = mgrid[-3:3:0.2] y = 0.1*x**2 + x + 1 z = sin(x) + 0.5*cos(y) ds.addSample(-2.5, -1) ds.addSample(-1.0, 3) gp.mean = 0 # new feature "autonoise" adds uncertainty to data depending on # it's distance to other points in the dataset. not tested much yet. # gp.autonoise = True gp.trainOnDataset(ds) gp.plotCurves(showSamples=True)
from __future__ import print_function #!/usr/bin/env python """ A simple example on how to use the GaussianProcess class in pybrain, for one and two dimensions. """ __author__ = "Thomas Rueckstiess, [email protected]" from pybrain.auxiliary import GaussianProcess from pybrain.datasets import SupervisedDataSet from scipy import mgrid, sin, cos, array, ravel from pylab import show, figure ds = SupervisedDataSet(1, 1) gp = GaussianProcess(indim=1, start=-3, stop=3, step=0.05) figure() x = mgrid[-3:3:0.2] y = 0.1*x**2 + x + 1 z = sin(x) + 0.5*cos(y) ds.addSample(-2.5, -1) ds.addSample(-1.0, 3) gp.mean = 0 # new feature "autonoise" adds uncertainty to data depending on # it's distance to other points in the dataset. not tested much yet. # gp.autonoise = True gp.trainOnDataset(ds) gp.plotCurves(showSamples=True)