def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        if self.onesigma:
            # algorithm for one global sigma for all mu's
            expln_params = expln(self.params)
            sumxsquared = dot(self.state, self.state)
            self._derivs += (
                sum((outbuf - inbuf)**2 - expln_params**2 * sumxsquared) /
                expln_params * explnPrime(self.params))
            inerr[:] = (outbuf - inbuf)

            if not self.autoalpha and sumxsquared != 0:
                inerr /= expln_params**2 * sumxsquared
                self._derivs /= expln_params**2 * sumxsquared
        else:
            # Algorithm for seperate sigma for each mu
            expln_params = expln(self.params).reshape(len(outbuf),
                                                      len(self.state))
            explnPrime_params = explnPrime(self.params).reshape(
                len(outbuf), len(self.state))

            idx = 0
            for j in range(len(outbuf)):
                sigma_subst2 = dot(self.state**2, expln_params[j, :]**2)
                for i in range(len(self.state)):
                    self._derivs[idx] = ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2) / sigma_subst2 * \
                        self.state[i] ** 2 * expln_params[j, i] * explnPrime_params[j, i]
                    if self.autoalpha and sigma_subst2 != 0:
                        self._derivs[idx] /= sigma_subst2
                    idx += 1
                inerr[j] = (outbuf[j] - inbuf[j])
                if not self.autoalpha and sigma_subst2 != 0:
                    inerr[j] /= sigma_subst2
Ejemplo n.º 2
0
 def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     expln_params = expln(self.params)
     self._derivs += ((outbuf - inbuf)**2 - expln_params**2) / expln_params * explnPrime(self.params)
     inerr[:] = (outbuf - inbuf)
     
     if not self.autoalpha:
         inerr /= expln_params**2
         self._derivs /= expln_params**2
Ejemplo n.º 3
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        expln_params = expln(self.params).reshape(len(outbuf), len(self.state))
        explnPrime_params = explnPrime(self.params).reshape(
            len(outbuf), len(self.state))

        idx = 0
        for j in range(len(outbuf)):
            sigma_subst2 = dot(self.state**2, expln_params[j, :]**2)
            for i in range(len(self.state)):
                self._derivs[idx] = ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2) / sigma_subst2 * \
                    self.state[i] ** 2 * expln_params[j, i] * explnPrime_params[j, i]
                # if self.autoalpha and sigma_subst2 != 0:
                # self._derivs[idx] /= sigma_subst2
                idx += 1
            inerr[j] = (outbuf[j] - inbuf[j])
Ejemplo n.º 4
0
    def __init__(self, statedim, actiondim, sigma=-2.):
        Explorer.__init__(self, actiondim, actiondim)
        self.statedim = statedim
        self.actiondim = actiondim

        # initialize parameters to sigma
        ParameterContainer.__init__(self, actiondim, stdParams=0)
        self.sigma = [sigma] * actiondim

        # exploration matrix (linear function)
        self.explmatrix = random.normal(0., expln(self.sigma),
                                        (statedim, actiondim))

        # store last state
        self.state = None
Ejemplo n.º 5
0
 def _forwardImplementation(self, inbuf, outbuf):
     if not self.enabled:
         outbuf[:] = inbuf
     else:
         outbuf[:] = random.normal(inbuf, expln(self.params))
Ejemplo n.º 6
0
 def newEpisode(self):
     """ Randomize the matrix values for exploration during one episode. """
     self.explmatrix = random.normal(0., expln(self.sigma),
                                     self.explmatrix.shape)
Ejemplo n.º 7
0
 def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     expln_sigma = expln(self.sigma)
     self._derivs += ((outbuf - inbuf)**2 -
                      expln_sigma**2) / expln_sigma * explnPrime(self.sigma)
     inerr[:] = (outbuf - inbuf)
Ejemplo n.º 8
0
 def _forwardImplementation(self, inbuf, outbuf):
     outbuf[:] = random.normal(inbuf, expln(self.sigma))
 def drawRandomWeights(self):
     self.module._setParameters(
         random.normal(0, expln(self.params), self.module.paramdim))