def get_updates(self, params, loss): grads = self.get_gradients(loss, params) accumulators = [shared_zeros(p.get_value().shape) for p in params] delta_accumulators = [ shared_zeros(p.get_value().shape) for p in params ] # self.updates = [] self.updates = [(self.iterations, self.iterations + 1.)] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): new_a = self.rho * a + (1 - self.rho) * g**2 # update accumulator self.updates.append((a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon) new_p = p - self.lr * update self.updates.append((p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * update**2 self.updates.append((d_a, new_d_a)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) accumulators = [shared_zeros(p.get_value().shape) for p in params] self.updates = [] for p, g, a, c in zip(params, grads, accumulators, constraints): new_a = a + g**2 # update accumulator self.updates.append((a, new_a)) new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon) self.updates.append((p, c(new_p))) # apply constraints return self.updates
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) accumulators = [shared_zeros(p.get_value().shape) for p in params] self.updates = [(self.iterations, self.iterations + 1.)] for p, g, a in zip(params, grads, accumulators): new_a = self.rho * a + (1 - self.rho) * g**2 # update accumulator self.updates.append((a, new_a)) new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon) self.updates.append((p, new_p)) # apply constraints return self.updates
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) lr = self.lr * (1.0 / (1.0 + self.decay * self.iterations)) self.updates = [(self.iterations, self.iterations + 1.)] for p, g in zip(params, grads): m = shared_zeros(p.get_value().shape) # momentum v = self.momentum * m - lr * g # velocity self.updates.append((m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v self.updates.append((p, new_p)) # apply constraints return self.updates
def zero(shape): return shared_zeros(shape)