def updateWeights(self): B = numpy.eye(self.weights.size) * self.precond b = numpy.zeros(self.weights.size) for sample in self.samples[:self.lstd_counter]: s, s_p, r = self.extractSample(sample) B = matrix.SMInv(B, s, (s - self.lstd_gamma * s_p), 1.0) b += s * r self.weights = numpy.dot(B, b).reshape(self.weights.shape)
def update(self, phi_t, phi_tp, reward): #RLS-TD(lambda) self.traces *= self.lmbda * self.gamma self.traces += phi_t # A update... d = numpy.dot(self.A, self.traces.flatten()) K = d / (self.alpha + numpy.dot((phi_t - self.gamma * phi_tp).flatten(), d)) self.A = matrix.SMInv(self.A, self.traces.flatten(), (phi_t - self.gamma*phi_tp).flatten(), self.alpha) self.weights += (reward - numpy.dot((phi_t - self.gamma * phi_tp).flatten(), self.weights.flatten())) * K.reshape(self.weights.shape)
def rescale_update(self, phi_t, phi_tp, delta, reward, descent_direction): self.adagrad_counter += 1 g = descent_direction.flatten() self.h = matrix.SMInv(self.h, g, g, 1.) if self.adagrad_counter > 0: Hinv = numpy.real(scipy.linalg.sqrtm(self.h)) descent_direction = numpy.dot(Hinv, descent_direction.flatten()) descent_direction *= numpy.sqrt(self.adagrad_counter) return self.step_sizes * descent_direction.reshape( self.step_sizes.shape)
def update(self, phi_t, phi_tp, reward): d = phi_t.flatten() - self.lstd_gamma * phi_tp.flatten() self.step_sizes += self.traces.flatten() * reward self.A = matrix.SMInv(self.A, self.traces.flatten(), d, 1.) self.weights = numpy.dot(self.A, self.step_sizes).reshape(self.weights.shape)