コード例 #1
0
 def updateWeights(self):
     B = numpy.eye(self.weights.size) * self.precond
     b = numpy.zeros(self.weights.size)
     for sample in self.samples[:self.lstd_counter]:
         s, s_p, r = self.extractSample(sample)
         B = matrix.SMInv(B, s, (s - self.lstd_gamma * s_p), 1.0)
         b += s * r
     self.weights = numpy.dot(B, b).reshape(self.weights.shape)
コード例 #2
0
    def update(self, phi_t, phi_tp, reward):
        #RLS-TD(lambda)
        self.traces *= self.lmbda * self.gamma
        self.traces += phi_t

        # A update...
        d = numpy.dot(self.A, self.traces.flatten())
        K = d / (self.alpha + numpy.dot((phi_t - self.gamma * phi_tp).flatten(), d))
        self.A = matrix.SMInv(self.A, self.traces.flatten(), (phi_t - self.gamma*phi_tp).flatten(), self.alpha)
        self.weights += (reward - numpy.dot((phi_t - self.gamma * phi_tp).flatten(), self.weights.flatten())) * K.reshape(self.weights.shape)
コード例 #3
0
 def rescale_update(self, phi_t, phi_tp, delta, reward, descent_direction):
     self.adagrad_counter += 1
     g = descent_direction.flatten()
     self.h = matrix.SMInv(self.h, g, g, 1.)
     if self.adagrad_counter > 0:
         Hinv = numpy.real(scipy.linalg.sqrtm(self.h))
         descent_direction = numpy.dot(Hinv, descent_direction.flatten())
         descent_direction *= numpy.sqrt(self.adagrad_counter)
     return self.step_sizes * descent_direction.reshape(
         self.step_sizes.shape)
コード例 #4
0
    def update(self, phi_t, phi_tp, reward):
        d = phi_t.flatten() - self.lstd_gamma * phi_tp.flatten()
        self.step_sizes += self.traces.flatten() * reward

        self.A = matrix.SMInv(self.A, self.traces.flatten(), d, 1.)
        self.weights = numpy.dot(self.A, self.step_sizes).reshape(self.weights.shape)