Example #1
0
 def __init__(self, net, task, valueNetwork=None, **args):
     self.net = net
     self.task = task
     self.setArgs(**args)
     if self.valueLearningRate == None:
         self.valueLearningRate = self.learningRate
     if self.valueMomentum == None:
         self.valueMomentum = self.momentum        
     if self.supervisedPlotting:
         from pylab import ion
         ion() 
     
     # adaptive temperature:
     self.tau = 1.
     
     # prepare the datasets to be used
     self.weightedDs = ImportanceDataSet(self.task.outdim, self.task.indim)
     self.rawDs = ReinforcementDataSet(self.task.outdim, self.task.indim)
     self.valueDs = SequentialDataSet(self.task.outdim, 1)
     
     # prepare the supervised trainers
     self.bp = BackpropTrainer(self.net, self.weightedDs, self.learningRate,
                               self.momentum, verbose=False,
                               batchlearning=True)            
     
     # CHECKME: outsource
     self.vnet = valueNetwork
     if valueNetwork != None:
         self.vbp = BackpropTrainer(self.vnet, self.valueDs, self.valueLearningRate,
                                    self.valueMomentum, verbose=self.verbose)
         
     # keep information:
     self.totalSteps = 0
     self.totalEpisodes = 0