Ejemplo n.º 1
0
 def train(self):
     networkFile = config.get('networkDir') + config.get(
         'snapshotPrefix') + '_iter_' + config.get(
             'trainingIterationsPerBatch') + '.caffemodel'
     interactions = config.geti('trainInteractions')
     minEpsilon = config.getf('minTrainingEpsilon')
     epochSize = len(self.environment.imageList) / 1
     epsilon = 1.0
     self.controller.setEpsilonGreedy(epsilon,
                                      self.environment.sampleAction)
     epoch = 1
     exEpochs = config.geti('explorationEpochs')
     while epoch <= exEpochs:
         s = cu.tic()
         print 'Epoch', epoch, ': Exploration (epsilon=1.0)'
         self.runEpoch(interactions, len(self.environment.imageList))
         self.task.flushStats()
         s = cu.toc('Epoch done in ', s)
         epoch += 1
     self.learner = QLearning()
     self.agent.learner = self.learner
     egEpochs = config.geti('epsilonGreedyEpochs')
     while epoch <= egEpochs + exEpochs:
         s = cu.tic()
         epsilon = epsilon - (1.0 - minEpsilon) / float(egEpochs)
         if epsilon < minEpsilon: epsilon = minEpsilon
         self.controller.setEpsilonGreedy(epsilon,
                                          self.environment.sampleAction)
         print 'Epoch', epoch, '(epsilon-greedy:{:5.3f})'.format(epsilon)
         self.runEpoch(interactions, epochSize)
         self.task.flushStats()
         self.doValidation(epoch)
         s = cu.toc('Epoch done in ', s)
         epoch += 1
     maxEpochs = config.geti('exploitLearningEpochs') + exEpochs + egEpochs
     while epoch <= maxEpochs:
         s = cu.tic()
         print 'Epoch', epoch, '(exploitation mode: epsilon={:5.3f})'.format(
             epsilon)
         self.runEpoch(interactions, epochSize)
         self.task.flushStats()
         self.doValidation(epoch)
         s = cu.toc('Epoch done in ', s)
         shutil.copy(networkFile, networkFile + '.' + str(epoch))
         epoch += 1
 def train(self):
   networkFile = config.get('networkDir') + config.get('snapshotPrefix') + '_iter_' + config.get('trainingIterationsPerBatch') + '.caffemodel'
   interactions = config.geti('trainInteractions')
   minEpsilon = config.getf('minTrainingEpsilon')
   epochSize = len(self.environment.imageList)/1
   epsilon = 1.0
   self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
   epoch = 1
   exEpochs = config.geti('explorationEpochs')
   while epoch <= exEpochs:
     s = cu.tic()
     print 'Epoch',epoch,': Exploration (epsilon=1.0)'
     self.runEpoch(interactions, len(self.environment.imageList))
     self.task.flushStats()
     s = cu.toc('Epoch done in ',s)
     epoch += 1
   self.learner = QLearning()
   self.agent.learner = self.learner
   egEpochs = config.geti('epsilonGreedyEpochs')
   while epoch <= egEpochs + exEpochs:
     s = cu.tic()
     epsilon = epsilon - (1.0-minEpsilon)/float(egEpochs)
     if epsilon < minEpsilon: epsilon = minEpsilon
     self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
     print 'Epoch',epoch ,'(epsilon-greedy:{:5.3f})'.format(epsilon)
     self.runEpoch(interactions, epochSize)
     self.task.flushStats()
     self.doValidation(epoch)
     s = cu.toc('Epoch done in ',s)
     epoch += 1
   maxEpochs = config.geti('exploitLearningEpochs') + exEpochs + egEpochs
   while epoch <= maxEpochs:
     s = cu.tic()
     print 'Epoch',epoch,'(exploitation mode: epsilon={:5.3f})'.format(epsilon)
     self.runEpoch(interactions, epochSize)
     self.task.flushStats()
     self.doValidation(epoch)
     s = cu.toc('Epoch done in ',s)
     shutil.copy(networkFile, networkFile + '.' + str(epoch))
     epoch += 1
Ejemplo n.º 3
0
 def runEpoch(self, interactions, maxImgs):
     img = 0
     s = cu.tic()
     while img < maxImgs:
         k = 0
         while not self.environment.episodeDone and k < interactions:
             self.experiment._oneInteraction()
             k += 1
         self.agent.learn()
         self.agent.reset()
         self.environment.loadNextEpisode()
         img += 1
     s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)
 def runEpoch(self, interactions, maxImgs):
   img = 0
   s = cu.tic()
   while img < maxImgs:
     k = 0
     while not self.environment.episodeDone and k < interactions:
       self.experiment._oneInteraction()
       k += 1
     self.agent.learn()
     self.agent.reset()
     self.environment.loadNextEpisode()
     img += 1
   s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)