def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - K.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def compute_mask(self, inputs, mask=None): if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None # Make a list of masks while making sure # the dimensionality of each mask # is the same as the corresponding input. masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, # but cast it to bool first masks.append(K.cast(K.ones_like(input_i), 'bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(K.expand_dims(mask_i)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) return K.all(concatenated, axis=-1, keepdims=False)
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = ( 1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean(K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1)
def sparse_categorical_accuracy(y_true, y_pred): return K.cast( K.equal(K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1), K.floatx())), K.floatx())
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') out = K.gather(self.embeddings, inputs) return out
def sparse_categorical_accuracy(y_true, y_pred): return K.equal( K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1), K.floatx()))
def __call__(self, w): w *= K.cast(w >= 0., K.floatx()) return w
def call(self, inputs): boolean_mask = K.any( K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) return inputs * K.cast(boolean_mask, K.floatx())
def iou_accuracy(y_true, y_pred): i = K.cast(K.cumsum(K.maximum(y_true*K.round(y_pred), 0.)), K.floatx()) u = K.cast(K.cumsum(K.maximum(y_true+K.round(y_pred), 0.)), K.floatx()) return i/u
def call(self, inputs, mask=None): return inputs * K.cast(inputs > self.theta, K.floatx())