Esempio n. 1
0
File: fem.py Progetto: HKou/pybrain
 def _generateSample(self):
     """ generate a new sample from the current distribution. """
     if self.useCauchy:
         # Cauchy distribution
         chosenOne = drawIndex(self.alphas, True)
         return multivariateCauchy(self.mus[chosenOne], self.sigmas[chosenOne])
     else:
         # Normal distribution
         chosenOne = drawIndex(self.alphas, True)
         return multivariate_normal(self.mus[chosenOne], self.sigmas[chosenOne])
Esempio n. 2
0
 def _generateSample(self):
     """ generate a new sample from the current distribution. """
     if self.useCauchy:
         # Cauchy distribution
         chosenOne = drawIndex(self.alphas, True)
         return multivariateCauchy(self.mus[chosenOne],
                                   self.sigmas[chosenOne])
     else:
         # Normal distribution
         chosenOne = drawIndex(self.alphas, True)
         return multivariate_normal(self.mus[chosenOne],
                                    self.sigmas[chosenOne])
Esempio n. 3
0
    def _generateOneOffspring(self, pop):
        """ produce one single offspring, given the population and the linkage matrix """
        # TODO: optimize?
        n = self.xdim
        # one gene is chosen directly
        initindex = choice(range(n))
        chosen = [(choice(range(len(pop))), initindex)]

        # the indices of the rest are shuffled
        indices = list(range(n))
        shuffle(indices)
        indices.remove(initindex)

        for index in indices:
            probs = zeros(len(pop))
            for parent in range(len(pop)):
                # determine the probability of drawing the i'th gene from parent p
                p1 = self._computeProbChosenGivenAq(len(pop), index, parent, chosen)
                p2 = self._computeProbChosenGivenAq(len(pop), index, parent, chosen, invertAq=True)
                probs[parent] = p1 / (p1 + (len(pop) - 1) * p2)
            # draw according to the probabilities
            chosen.append((drawIndex(probs, tolerant=True), index))

        child = zeros(self.xdim)
        crossovervector = zeros(self.xdim)
        for parent, index in chosen:
            child[index] = pop[parent][index]
            crossovervector[index] = parent
        return child, crossovervector
Esempio n. 4
0
 def _generateOneOffspring(self, pop):
     """ produce one single offspring, given the population and the linkage matrix """
     # TODO: optimize?
     n = self.xdim
     # one gene is chosen directly
     initindex = choice(range(n))
     chosen = [(choice(range(len(pop))), initindex)]
     
     # the indices of the rest are shuffled
     indices = list(range(n))
     shuffle(indices)
     indices.remove(initindex)
     
     for index in indices:
         probs = zeros(len(pop))
         for parent in range(len(pop)):
             # determine the probability of drawing the i'th gene from parent p
             p1 = self._computeProbChosenGivenAq(len(pop), index, parent, chosen)
             p2 = self._computeProbChosenGivenAq(len(pop), index, parent, chosen, invertAq = True)   
             probs[parent] = p1 / (p1 + (len(pop)-1)* p2)
         # draw according to the probabilities
         chosen.append((drawIndex(probs, tolerant = True), index))
         
     child = zeros(self.xdim)
     crossovervector = zeros(self.xdim)
     for parent, index in chosen:
         child[index] = pop[parent][index]   
         crossovervector[index] = parent       
     return child, crossovervector 
Esempio n. 5
0
    def _produceNewSample(self):
        """ returns a new sample, its fitness and its densities """
        chosenOne = drawIndex(self.alphas, True)
        mu = self.mus[chosenOne]

        if self.useAnticipatedMeanShift:
            if len(self.allsamples) % 2 == 1 and len(self.allsamples) > 1:
                if not (self.elitism and chosenOne == self.bestChosenCenter):
                    mu += self.meanShifts[chosenOne]

        if self.diagonalOnly:
            sample = normal(mu, self.sigmas[chosenOne])
        else:
            sample = multivariate_normal(mu, self.sigmas[chosenOne])
        if self.sampleElitism and len(
                self.allsamples) > self.windowSize and len(
                    self.allsamples) % self.windowSize == 0:
            sample = self.bestEvaluable.copy()
        fit = self._oneEvaluation(sample)

        if ((not self.minimize and fit >= self.bestEvaluation)
                or (self.minimize and fit <= self.bestEvaluation)
                or len(self.allsamples) == 0):
            # used to determine which center produced the current best
            self.bestChosenCenter = chosenOne
            self.bestSigma = self.sigmas[chosenOne].copy()
        if self.minimize:
            fit = -fit
        self.allfitnesses.append(fit)
        self.allsamples.append(sample)
        return sample, fit
Esempio n. 6
0
    def getAction(self):
        self.lastaction = drawIndex(self._actionProbs(self.lastobs), True)
        if self.learning and not self.learner.batchMode and self._oaro is not None:
            self.learner._updateWeights(*(self._oaro + [self.lastaction]))
            self._oaro = None
#         print "Agent " + str(self.indexOfAgent) + ": " + str(self.lastaction)
        return array([self.lastaction])
 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant = True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
    def getAction(self):
        self.lastaction = drawIndex(self._actionProbs(self.lastobs), True)
        if self.learning and not self.learner.batchMode and self._oaro is not None:
            self.learner._updateWeights(*(self._oaro + [self.lastaction]))
            self._oaro = None
#         print "Agent " + str(self.indexOfAgent) + ": " + str(self.lastaction)
        return array([self.lastaction])
 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant=True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
Esempio n. 10
0
    def _produceNewSample(self):
        """ returns a new sample, its fitness and its densities """
        chosenOne = drawIndex(self.alphas, True)
        mu = self.mus[chosenOne]

        if self.useAnticipatedMeanShift:
            if len(self.allsamples) % 2 == 1 and len(self.allsamples) > 1:
                if not(self.elitism and chosenOne == self.bestChosenCenter):
                    mu += self.meanShifts[chosenOne]

        if self.diagonalOnly:
            sample = normal(mu, self.sigmas[chosenOne])
        else:
            sample = multivariate_normal(mu, self.sigmas[chosenOne])
        if self.sampleElitism and len(self.allsamples) > self.windowSize and len(self.allsamples) % self.windowSize == 0:
            sample = self.bestEvaluable.copy()
        fit = self._oneEvaluation(sample)

        if ((not self.minimize and fit >= self.bestEvaluation)
            or (self.minimize and fit <= self.bestEvaluation)
            or len(self.allsamples) == 0):
            # used to determine which center produced the current best
            self.bestChosenCenter = chosenOne
            self.bestSigma = self.sigmas[chosenOne].copy()
        if self.minimize:
            fit = -fit
        self.allfitnesses.append(fit)
        self.allsamples.append(sample)
        return sample, fit
Esempio n. 11
0
File: nes.py Progetto: HKou/pybrain
 def computeChi(self, evals = 100):
     """ compute an estimate of the distance from the centers to the generated points """
     # CHECKME: correct handling of multiple centers?
     s = 0
     for dummy in range(evals):
         m = drawIndex(self.alpha, tolerant = True)
         z = mat(multivariate_normal(array(self.x[m]).flatten(), self.sigma[m])).T            
         s += norm(self.x[m] - z)
     return s/evals
Esempio n. 12
0
 def computeChi(self, evals=100):
     """ compute an estimate of the distance from the centers to the generated points """
     # CHECKME: correct handling of multiple centers?
     s = 0
     for dummy in range(evals):
         m = drawIndex(self.alpha, tolerant=True)
         z = mat(
             multivariate_normal(array(self.x[m]).flatten(),
                                 self.sigma[m])).T
         s += norm(self.x[m] - z)
     return s / evals
Esempio n. 13
0
 def oneSample(self, k):
     """ produce one new sample and update phi correspondingly """
     thesum = 0.0
     for i in range(self.mu):
         thesum += exp(self.basealpha[i])
     for i in range(self.mu):
         self.alpha[i] = exp(self.basealpha[i]) / thesum
     choosem = drawIndex(self.alpha, tolerant=True)
     self.chosenCenter[k] = choosem
     z = mat(
         multivariate_normal(
             array(self.x[choosem]).flatten(), self.sigma[choosem])).T
     self.zs[k] = z
     self.R[k] = self.evaluateAt(z)
     # TODO make for all mu
     if self.importanceSampling:
         self.rellhood[k] = multivariateNormalPdf(z, self.x[0],
                                                  self.sigma[0])
     logderivbasealpha = zeros((self.mu, 1))
     logderivx = zeros((self.mu, self.xdim))
     logderivfactorsigma = zeros((self.mu, self.xdim, self.xdim))
     for m in range(self.mu):
         self.sigma[m] = dot(self.factorSigma[m].T, self.factorSigma[m])
         if self.mu > 1:
             relresponsibility = (self.alpha[m] * multivariateNormalPdf(
                 ravel(z), ravel(self.x[m]), self.sigma[m]) / sum(
                     map(
                         lambda mm: self.alpha[mm] * multivariateNormalPdf(
                             ravel(z), ravel(self.x[mm]), self.sigma[mm]),
                         range(self.mu))))
         else:
             relresponsibility = 1.0
         if self.mu > 1:
             logderivbasealpha[m] = relresponsibility * (1.0 -
                                                         self.alpha[m])
         else:
             logderivbasealpha[m] = 0.0
         logderivx[m] = relresponsibility * (self.sigma[m].I *
                                             (z - self.x[m])).flatten()
         A = 0.5 * self.sigma[m].I * (z - self.x[m]) * (
             z - self.x[m]).T * self.sigma[m].I - 0.5 * self.sigma[m].I
         logderivsigma_m = self.blackmagic * relresponsibility * A  #0.5 * (A + diag(diag(A)))  #* 2.0
         logderivfactorsigma[m] = self.factorSigma[m] * (logderivsigma_m +
                                                         logderivsigma_m.T)
     #print 'logalpha', logderivbasealpha.flatten(), self.alpha, sum(logderivbasealpha)
     tmp = self.combineParams(logderivbasealpha, logderivx,
                              logderivfactorsigma)
     self.phi[k] = tmp
Esempio n. 14
0
    def _performAction(self, action, onlyavatar=False):
        """ Action is an index for the actionset.  """
        if action is None:
            return action

        # if actions are given as a vector, pick the argmax
        import numpy
        from scipy import argmax
        from pybrain.utilities import drawIndex
        if isinstance(action, numpy.ndarray):
            if abs(sum(action) - 1) < 1e5:
                # vector represents probabilities
                action = drawIndex(action)
            else:
                action = argmax(action)

        # take action and compute consequences
        # replace the method that reads multiple action keys with a fn that just
        # returns the currently desired action
        self._avatar._readMultiActions = lambda *x: [self._actionset[action]]

        if self.visualize:
            self._game._clearAll(self.visualize)

        # update sprites
        if onlyavatar:
            self._avatar.update(self._game)
        else:
            for s in self._game:
                s.update(self._game)

        # handle collision effects
        self._game._eventHandling()
        if self.visualize:
            self._game._clearAll(self.visualize)

        # update screen
        if self.visualize:
            self._game._drawAll()
            pygame.display.update(VGDLSprite.dirtyrects)
            VGDLSprite.dirtyrects = []
            pygame.time.wait(self.actionDelay)

        if self.recordingEnabled:
            self._previous_state = self._last_state
            self._last_state = self.getState()
            self._allEvents.append(
                (self._previous_state, action, self._last_state))
Esempio n. 15
0
    def performAction(self, action, onlyavatar=False):
        """ Action is an index for the actionset.  """
        if action is None:
            return   
        # if actions are given as a vector, pick the argmax
        import numpy
        from scipy import argmax
        from pybrain.utilities import drawIndex
        if isinstance(action, numpy.ndarray):
            if abs(sum(action) -1) < 1e5:
                # vector represents probabilities
                action = drawIndex(action)
            else:
                action = argmax(action) 
    
        
        # take action and compute consequences
        self._avatar._readMultiActions = lambda *x: [self._actionset[action]]        
        self._game._clearAll(self.visualize)
        
        # update sprites 
        if onlyavatar:
            self._avatar.update(self._game)
        else:
            for s in self._game:
                s.update(self._game)
        
        # handle collision effects                
        self._game._updateCollisionDict()
        self._game._eventHandling()
        self._game._clearAll(self.visualize)
        
        # update screen
        if self.visualize:
            self._game._drawAll()                            
            pygame.display.update(VGDLSprite.dirtyrects)
            VGDLSprite.dirtyrects = []
            pygame.time.wait(self.actionDelay)       
                       

        if self.recordingEnabled:
            self._previous_state = self._last_state
            self._last_state = self.getState()
            self._allEvents.append((self._previous_state, action, self._last_state))
Esempio n. 16
0
File: nes.py Progetto: HKou/pybrain
 def oneSample(self, k):
     """ produce one new sample and update phi correspondingly """
     thesum = 0.0
     for i in range(self.mu):
         thesum += exp(self.basealpha[i])
     for i in range(self.mu):
         self.alpha[i] = exp(self.basealpha[i])/thesum
     choosem = drawIndex(self.alpha, tolerant = True)
     self.chosenCenter[k] = choosem
     z = mat(multivariate_normal(array(self.x[choosem]).flatten(), self.sigma[choosem])).T
     self.zs[k] = z
     self.R[k] = self.evaluateAt(z)
     # TODO make for all mu
     if self.importanceSampling:
         self.rellhood[k] = multivariateNormalPdf(z, self.x[0], self.sigma[0])
     logderivbasealpha = zeros((self.mu, 1))
     logderivx = zeros((self.mu, self.xdim))
     logderivfactorsigma = zeros((self.mu, self.xdim, self.xdim))
     for m in range(self.mu):
         self.sigma[m] = dot(self.factorSigma[m].T,self.factorSigma[m])
         if self.mu > 1:
             relresponsibility = (self.alpha[m] * multivariateNormalPdf(ravel(z), ravel(self.x[m]), self.sigma[m]) / 
                              sum(map(lambda mm: self.alpha[mm]*multivariateNormalPdf(ravel(z), ravel(self.x[mm]), self.sigma[mm]), range(self.mu))))
         else:
             relresponsibility = 1.0
         if self.mu > 1:
             logderivbasealpha[m] = relresponsibility * (1.0 - self.alpha[m])
         else:
             logderivbasealpha[m] = 0.0
         logderivx[m] = relresponsibility * (self.sigma[m].I * (z - self.x[m])).flatten()                
         A = 0.5 * self.sigma[m].I * (z - self.x[m]) * (z - self.x[m]).T * self.sigma[m].I - 0.5 * self.sigma[m].I
         logderivsigma_m = self.blackmagic * relresponsibility * A#0.5 * (A + diag(diag(A)))  #* 2.0
         logderivfactorsigma[m] = self.factorSigma[m]*(logderivsigma_m + logderivsigma_m.T)
     #print 'logalpha', logderivbasealpha.flatten(), self.alpha, sum(logderivbasealpha)
     tmp = self.combineParams(logderivbasealpha, logderivx, logderivfactorsigma)
     self.phi[k] = tmp
Esempio n. 17
0
 def getAction(self):
     return drawIndex(self.policy[self.stateIndexFun()])
Esempio n. 18
0
    def learnOneBatch(self):
        # collect a batch of runs as experience
        r0s = []
        lens = []
        avgReward = 0.
        for dummy in range(self.batchSize):
            self.rawDs.newSequence()
            self.valueDs.newSequence()
            self.task.reset()
            self.net.reset()
            acts, obss, rewards = [], [], []
            while not self.task.isFinished():
                obs = self.task.getObservation()
                act = self.net.activate(obs)
                chosen = drawIndex(act)
                self.task.performAction(chosen)
                reward = self.task.getReward()
                obss.append(obs)
                y = zeros(len(act))
                y[chosen] = 1
                acts.append(y)
                rewards.append(reward)
            avgReward += sum(rewards) / float(len(rewards))

            # compute the returns from the list of rewards
            current = 0
            returns = []
            for r in reversed(rewards):
                current *= self.task.discount
                current += r
                returns.append(current)
            returns.reverse()
            for i in range(len(obss)):
                self.rawDs.addSample(obss[i], acts[i], returns[i])
                self.valueDs.addSample(obss[i], returns[i])
            r0s.append(returns[0])
            lens.append(len(returns))

        r0s = array(r0s)
        self.totalSteps += sum(lens)
        avgLen = sum(lens) / float(self.batchSize)
        avgR0 = mean(r0s)
        avgReward /= self.batchSize
        if self.verbose:
            print((
                '***',
                round(avgLen, 3),
                '***',
                '(avg init exp. return:',
                round(avgR0, 5),
                ')',
            ))
            print(('avg reward', round(avgReward,
                                       5), '(tau:', round(self.tau, 3), ')'))
            print(lens)
        # storage:
        self.rewardAvg.append(avgReward)
        self.lengthAvg.append(avgLen)
        self.initr0Avg.append(avgR0)

        #        if self.vnet == None:
        #            # case 1: no value estimator:

        # prepare the dataset for training the acting network
        shaped = self.shapingFunction(r0s)
        self.updateTau(r0s, shaped)
        shaped /= max(shaped)
        for i, seq in enumerate(self.rawDs):
            self.weightedDs.newSequence()
            for sample in seq:
                obs, act, dummy = sample
                self.weightedDs.addSample(obs, act, shaped[i])

#        else:
#            # case 2: value estimator:
#
#
#            # train the value estimating network
#            if self.verbose: print('Old value error:  ', self.vbp.testOnData())
#            self.vbp.trainEpochs(self.valueTrainEpochs)
#            if self.verbose: print('New value error:  ', self.vbp.testOnData())
#
#            # produce the values and analyze
#            rminusvs = []
#            sizes = []
#            for i, seq in enumerate(self.valueDs):
#                self.vnet.reset()
#                seq = list(seq)
#                for sample in seq:
#                    obs, ret = sample
#                    val = self.vnet.activate(obs)
#                    rminusvs.append(ret-val)
#                    sizes.append(len(seq))
#
#            rminusvs = array(rminusvs)
#            shapedRminusv = self.shapingFunction(rminusvs)
#            # CHECKME: here?
#            self.updateTau(rminusvs, shapedRminusv)
#            shapedRminusv /= array(sizes)
#            shapedRminusv /= max(shapedRminusv)
#
#            # prepare the dataset for training the acting network
#            rvindex = 0
#            for i, seq in enumerate(self.rawDs):
#                self.weightedDs.newSequence()
#                self.vnet.reset()
#                for sample in seq:
#                    obs, act, ret = sample
#                    self.weightedDs.addSample(obs, act, shapedRminusv[rvindex])
#                    rvindex += 1

# train the acting network
        tmp1, tmp2 = self.bp.trainUntilConvergence(
            maxEpochs=self.maxEpochs,
            validationProportion=self.validationProportion,
            continueEpochs=self.continueEpochs,
            verbose=self.verbose)
        if self.supervisedPlotting:
            from pylab import plot, legend, figure, clf, draw
            figure(1)
            clf()
            plot(tmp1, label='train')
            plot(tmp2, label='valid')
            legend()
            draw()

        return avgLen, avgR0
Esempio n. 19
0
    def learnOneBatch(self):
        # collect a batch of runs as experience
        r0s = []
        lens = []
        avgReward = 0.
        for dummy in range(self.batchSize):
            self.rawDs.newSequence()
            self.valueDs.newSequence()
            self.task.reset()
            self.net.reset()
            acts, obss, rewards = [], [], []
            while not self.task.isFinished():
                obs = self.task.getObservation()
                act = self.net.activate(obs)
                chosen = drawIndex(act)
                self.task.performAction(chosen)
                reward = self.task.getReward()
                obss.append(obs)
                y = zeros(len(act))
                y[chosen] = 1
                acts.append(y)
                rewards.append(reward)
            avgReward += sum(rewards) / float(len(rewards))
            
            # compute the returns from the list of rewards
            current = 0        
            returns = []
            for r in reversed(rewards):
                current *= self.task.discount
                current += r
                returns.append(current)
            returns.reverse()
            for i in range(len(obss)):
                self.rawDs.addSample(obss[i], acts[i], returns[i])
                self.valueDs.addSample(obss[i], returns[i])
            r0s.append(returns[0])
            lens.append(len(returns))
            
        r0s = array(r0s)  
        self.totalSteps += sum(lens)
        avgLen = sum(lens) / float(self.batchSize)
        avgR0 = mean(r0s)
        avgReward /= self.batchSize
        if self.verbose:
            print '***', round(avgLen, 3), '***', '(avg init exp. return:', round(avgR0, 5), ')',
            print 'avg reward', round(avgReward, 5), '(tau:', round(self.tau, 3), ')'
            print lens        
        # storage:
        self.rewardAvg.append(avgReward)
        self.lengthAvg.append(avgLen)
        self.initr0Avg.append(avgR0)
        
        
#        if self.vnet == None:
#            # case 1: no value estimator:
            
        # prepare the dataset for training the acting network  
        shaped = self.shapingFunction(r0s)
        self.updateTau(r0s, shaped)
        shaped /= max(shaped)
        for i, seq in enumerate(self.rawDs):
            self.weightedDs.newSequence()
            for sample in seq:
                obs, act, dummy = sample
                self.weightedDs.addSample(obs, act, shaped[i])
                    
#        else:
#            # case 2: value estimator:
#            
#            
#            # train the value estimating network
#            if self.verbose: print 'Old value error:  ', self.vbp.testOnData()
#            self.vbp.trainEpochs(self.valueTrainEpochs)
#            if self.verbose: print 'New value error:  ', self.vbp.testOnData()
#            
#            # produce the values and analyze
#            rminusvs = []
#            sizes = []
#            for i, seq in enumerate(self.valueDs):
#                self.vnet.reset()
#                seq = list(seq)
#                for sample in seq:
#                    obs, ret = sample
#                    val = self.vnet.activate(obs)
#                    rminusvs.append(ret-val)
#                    sizes.append(len(seq))
#                    
#            rminusvs = array(rminusvs)
#            shapedRminusv = self.shapingFunction(rminusvs)
#            # CHECKME: here?
#            self.updateTau(rminusvs, shapedRminusv)
#            shapedRminusv /= array(sizes)
#            shapedRminusv /= max(shapedRminusv)
#            
#            # prepare the dataset for training the acting network    
#            rvindex = 0
#            for i, seq in enumerate(self.rawDs):
#                self.weightedDs.newSequence()
#                self.vnet.reset()
#                for sample in seq:
#                    obs, act, ret = sample
#                    self.weightedDs.addSample(obs, act, shapedRminusv[rvindex])
#                    rvindex += 1
                    
        # train the acting network                
        tmp1, tmp2 = self.bp.trainUntilConvergence(maxEpochs=self.maxEpochs,
                                                   validationProportion=self.validationProportion,
                                                   continueEpochs=self.continueEpochs,
                                                   verbose=self.verbose)
        if self.supervisedPlotting:
            from pylab import plot, legend, figure, clf, draw
            figure(1)
            clf()
            plot(tmp1, label='train')
            plot(tmp2, label='valid')
            legend()
            draw()  
            
        return avgLen, avgR0                        
 def getAction(self):
     self.lastaction = drawIndex(self._actionProbs(self.lastobs), True)
     return array([self.lastaction])
Esempio n. 21
0
 def getAction(self):
     return drawIndex(self.policy[self.stateIndexFun()])