def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) accumulators = [ K.variable(np.zeros(K.get_value(p).shape)) for p in params ] delta_accumulators = [ K.variable(np.zeros(K.get_value(p).shape)) for p in params ] self.updates = [] for p, g, a, d_a, c in zip(params, grads, accumulators, delta_accumulators, constraints): # update accumulator new_a = self.rho * a + (1 - self.rho) * K.square(g) self.updates.append((a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - self.lr * update self.updates.append((p, c(new_p))) # apply constraints # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append((d_a, new_d_a)) return self.updates
def get_gradients(self, loss, params): grads = K.gradients(loss, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params] self.updates = [] for p, g, a, c in zip(params, grads, accumulators, constraints): new_a = a + K.square(g) # update accumulator self.updates.append((a, new_a)) new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon) self.updates.append((p, c(new_p))) # apply constraints return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) accumulators = [ K.variable(np.zeros(K.get_value(p).shape)) for p in params ] self.updates = [] for p, g, a, c in zip(params, grads, accumulators, constraints): new_a = a + K.square(g) # update accumulator self.updates.append((a, new_a)) new_p = p - self.lr * g / K.sqrt(new_a + self.epsilon) self.updates.append((p, c(new_p))) # apply constraints return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params] delta_accumulators = [K.variable(np.zeros(K.get_value(p).shape)) for p in params] self.updates = [] for p, g, a, d_a, c in zip(params, grads, accumulators, delta_accumulators, constraints): # update accumulator new_a = self.rho * a + (1 - self.rho) * K.square(g) self.updates.append((a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - self.lr * update self.updates.append((p, c(new_p))) # apply constraints # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append((d_a, new_d_a)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [(self.iterations, self.iterations+1.)] t = self.iterations + 1 lr_t = self.lr * K.sqrt(1 - K.pow(self.beta_2, t)) / (1 - K.pow(self.beta_1, t)) for p, g, c in zip(params, grads, constraints): # zero init of moment m = K.variable(np.zeros(K.get_value(p).shape)) # zero init of velocity v = K.variable(np.zeros(K.get_value(p).shape)) m_t = (self.beta_1 * m) + (1 - self.beta_1) * g v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append((m, m_t)) self.updates.append((v, v_t)) self.updates.append((p, c(p_t))) # apply constraints return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [(self.iterations, self.iterations + 1.)] t = self.iterations + 1 lr_t = self.lr * K.sqrt(1 - K.pow(self.beta_2, t)) / ( 1 - K.pow(self.beta_1, t)) for p, g, c in zip(params, grads, constraints): # zero init of moment m = K.variable(np.zeros(K.get_value(p).shape)) # zero init of velocity v = K.variable(np.zeros(K.get_value(p).shape)) m_t = (self.beta_1 * m) + (1 - self.beta_1) * g v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append((m, m_t)) self.updates.append((v, v_t)) self.updates.append((p, c(p_t))) # apply constraints return self.updates
def mean_squared_logarithmic_error(y_true, y_pred): first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.) return K.mean(K.square(first_log - second_log), axis=-1)
def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
def __call__(self, p): return p / K.sqrt(K.sum(K.square(p), axis=-1, keepdims=True))
def __call__(self, p): norms = K.sqrt(K.sum(K.square(p), axis=0)) desired = K.clip(norms, 0, self.m) p = p * (desired / (1e-7 + norms)) return p
def squared_hinge(y_true, y_pred): return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1)