Example #1
  def weighted(y_true, y_pred, weights, mask=None):
    """Wrapper function.

        y_true: `y_true` argument of `fn`.
        y_pred: `y_pred` argument of `fn`.
        weights: Weights tensor.
        mask: Mask tensor.

        Scalar tensor.
    # score_array has ndim >= 2
    score_array = fn(y_true, y_pred)
    if mask is not None:
      # Cast the mask to floatX to avoid float64 upcasting in theano
      mask = K.cast(mask, K.floatx())
      # mask should have the same shape as score_array
      score_array *= mask
      #  the loss per batch should be proportional
      #  to the number of unmasked samples.
      score_array /= K.mean(mask)

    # apply sample weighting
    if weights is not None:
      # reduce score_array to same ndim as weight array
      ndim = K.ndim(score_array)
      weight_ndim = K.ndim(weights)
      score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim)))
      score_array *= weights
      score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx()))
    return K.mean(score_array)
  def weighted(y_true, y_pred, weights, mask=None):
    """Wrapper function.

        y_true: `y_true` argument of `fn`.
        y_pred: `y_pred` argument of `fn`.
        weights: Weights tensor.
        mask: Mask tensor.

        Scalar tensor.
    # score_array has ndim >= 2
    score_array = fn(y_true, y_pred)
    if mask is not None:
      # Cast the mask to floatX to avoid float64 upcasting in theano
      mask = K.cast(mask, K.floatx())
      # mask should have the same shape as score_array
      score_array *= mask
      #  the loss per batch should be proportional
      #  to the number of unmasked samples.
      score_array /= K.mean(mask)

    # apply sample weighting
    if weights is not None:
      # reduce score_array to same ndim as weight array
      ndim = K.ndim(score_array)
      weight_ndim = K.ndim(weights)
      score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim)))
      score_array *= weights
      score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx()))
    return K.mean(score_array)
Example #3
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr *= (1. /
             (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay))))

    t = K.cast(self.iterations, K.floatx()) + 1
    lr_t = lr / (1. - K.pow(self.beta_1, t))

    shapes = [K.int_shape(p) for p in params]
    # zero init of 1st moment
    ms = [K.zeros(shape) for shape in shapes]
    # zero init of exponentially weighted infinity norm
    us = [K.zeros(shape) for shape in shapes]
    self.weights = [self.iterations] + ms + us

    for p, g, m, u in zip(params, grads, ms, us):

      m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
      u_t = K.maximum(self.beta_2 * u, K.abs(g))
      p_t = p - lr_t * m_t / (u_t + self.epsilon)

      self.updates.append(K.update(m, m_t))
      self.updates.append(K.update(u, u_t))
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
Example #4
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Example #5
 def compute_mask(self, inputs, mask=None):
     if mask is None:
         return None
     if not isinstance(mask, list):
         raise ValueError('`mask` should be a list.')
     if not isinstance(inputs, list):
         raise ValueError('`inputs` should be a list.')
     if len(mask) != len(inputs):
         raise ValueError('The lists `inputs` and `mask` '
                          'should have the same length.')
     if all([m is None for m in mask]):
         return None
     # Make a list of masks while making sure
     # the dimensionality of each mask
     # is the same as the corresponding input.
     masks = []
     for input_i, mask_i in zip(inputs, mask):
         if mask_i is None:
             # Input is unmasked. Append all 1s to masks,
             # but cast it to bool first
             masks.append(K.cast(K.ones_like(input_i), 'bool'))
         elif K.ndim(mask_i) < K.ndim(input_i):
             # Mask is smaller than the input, expand it
     concatenated = K.concatenate(masks, axis=self.axis)
     return K.all(concatenated, axis=-1, keepdims=False)
Example #6
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    delta_accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators + delta_accumulators
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (1. /  # pylint: disable=g-no-augmented-assignment
                 (1. + self.decay * K.cast(self.iterations,

    for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
      # update accumulator
      new_a = self.rho * a + (1. - self.rho) * K.square(g)
      self.updates.append(K.update(a, new_a))

      # use the new accumulator and the *old* delta_accumulator
      update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
      new_p = p - lr * update

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))

      # update delta_accumulator
      new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
      self.updates.append(K.update(d_a, new_d_a))
    return self.updates
Example #7
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        accumulators = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        self.weights = accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Example #8
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (1. /  # pylint: disable=g-no-augmented-assignment
                 (1. + self.decay * K.cast(self.iterations,
    # momentum
    shapes = [K.int_shape(p) for p in params]
    moments = [K.zeros(shape) for shape in shapes]
    self.weights = [self.iterations] + moments
    for p, g, m in zip(params, grads, moments):
      v = self.momentum * m - lr * g  # velocity
      self.updates.append(K.update(m, v))

      if self.nesterov:
        new_p = p + self.momentum * v - lr * g
        new_p = p + v

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
Example #9
 def compute_mask(self, inputs, mask=None):
   if mask is None:
     return None
   if not isinstance(mask, list):
     raise ValueError('`mask` should be a list.')
   if not isinstance(inputs, list):
     raise ValueError('`inputs` should be a list.')
   if len(mask) != len(inputs):
     raise ValueError('The lists `inputs` and `mask` '
                      'should have the same length.')
   if all([m is None for m in mask]):
     return None
   # Make a list of masks while making sure
   # the dimensionality of each mask
   # is the same as the corresponding input.
   masks = []
   for input_i, mask_i in zip(inputs, mask):
     if mask_i is None:
       # Input is unmasked. Append all 1s to masks,
       # but cast it to bool first
       masks.append(K.cast(K.ones_like(input_i), 'bool'))
     elif K.ndim(mask_i) < K.ndim(input_i):
       # Mask is smaller than the input, expand it
   concatenated = K.concatenate(masks, axis=self.axis)
   return K.all(concatenated, axis=-1, keepdims=False)
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (
                1. /  # pylint: disable=g-no-augmented-assignment
                (1. +
                 self.decay * K.cast(self.iterations, K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        shapes = [K.int_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (
                1. /  # pylint: disable=g-no-augmented-assignment
                (1. +
                 self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            new_a = a + K.square(g)  # update accumulator
            self.updates.append(K.update(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    delta_accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators + delta_accumulators
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (1. /  # pylint: disable=g-no-augmented-assignment
                 (1. + self.decay * K.cast(self.iterations,

    for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
      # update accumulator
      new_a = self.rho * a + (1. - self.rho) * K.square(g)
      self.updates.append(K.update(a, new_a))

      # use the new accumulator and the *old* delta_accumulator
      update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
      new_p = p - lr * update

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))

      # update delta_accumulator
      new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
      self.updates.append(K.update(d_a, new_d_a))
    return self.updates
Example #13
 def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):
   alpha_p = -alpha * scale
   kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed),
   kept_idx = K.cast(kept_idx, K.floatx())
   a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5
   b = -a * alpha_p * rate
   x = inputs * kept_idx + alpha_p * (1 - kept_idx)
   return a * x + b
Example #14
 def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):
   alpha_p = -alpha * scale
   kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed),
   kept_idx = K.cast(kept_idx, K.floatx())
   a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5
   b = -a * alpha_p * rate
   x = inputs * kept_idx + alpha_p * (1 - kept_idx)
   return a * x + b
Example #15
def _preprocess_symbolic_input(x, data_format, mode):
    """Preprocesses a tensor encoding a batch of images.

      x: Input tensor, 3D or 4D.
      data_format: Data format of the image tensor.
      mode: One of "caffe", "tf" or "torch".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,
          - torch: will scale pixels between 0 and 1 and then
              will normalize each channel with respect to the
              ImageNet dataset.

      Preprocessed tensor.
    global _IMAGENET_MEAN

    if mode == 'tf':
        x /= 127.5
        x -= 1.
        return x

    if mode == 'torch':
        x /= 255.
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        if data_format == 'channels_first':
            # 'RGB'->'BGR'
            if K.ndim(x) == 3:
                x = x[::-1, ...]
                x = x[:, ::-1, ...]
            # 'RGB'->'BGR'
            x = x[..., ::-1]
        mean = [103.939, 116.779, 123.68]
        std = None

    if _IMAGENET_MEAN is None:
        _IMAGENET_MEAN = K.constant(-np.array(mean))

    # Zero-center by mean pixel
    if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
        x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
        x = K.bias_add(x, _IMAGENET_MEAN, data_format)
    if std is not None:
        x /= std
    return x
Example #16
def _preprocess_symbolic_input(x, data_format, mode):
  """Preprocesses a tensor encoding a batch of images.

      x: Input tensor, 3D or 4D.
      data_format: Data format of the image tensor.
      mode: One of "caffe", "tf" or "torch".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,
          - torch: will scale pixels between 0 and 1 and then
              will normalize each channel with respect to the
              ImageNet dataset.

      Preprocessed tensor.

  if mode == 'tf':
    x /= 127.5
    x -= 1.
    return x

  if mode == 'torch':
    x /= 255.
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    if data_format == 'channels_first':
      # 'RGB'->'BGR'
      if K.ndim(x) == 3:
        x = x[::-1, ...]
        x = x[:, ::-1, ...]
      # 'RGB'->'BGR'
      x = x[..., ::-1]
    mean = [103.939, 116.779, 123.68]
    std = None

  if _IMAGENET_MEAN is None:
    _IMAGENET_MEAN = K.constant(-np.array(mean))

  # Zero-center by mean pixel
  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
    x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
    x = K.bias_add(x, _IMAGENET_MEAN, data_format)
  if std is not None:
    x /= std
  return x
Example #17
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (1. /  # pylint: disable=g-no-augmented-assignment
                 (1. + self.decay * K.cast(self.iterations,

    t = K.cast(self.iterations, K.floatx()) + 1
    lr_t = lr * (
        K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)))

    ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    if self.amsgrad:
      vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
      vhats = [K.zeros(1) for _ in params]
    self.weights = [self.iterations] + ms + vs + vhats

    for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
      m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
      v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
      if self.amsgrad:
        vhat_t = K.maximum(vhat, v_t)
        p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
        self.updates.append(K.update(vhat, vhat_t))
        p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

      self.updates.append(K.update(m, m_t))
      self.updates.append(K.update(v, v_t))
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (
                1. /  # pylint: disable=g-no-augmented-assignment
                (1. +
                 self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - lr_t * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (
            1. - 0.5 *
            (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (
            1. - 0.5 * (K.pow(K.cast_to_floatx(0.96),
                              (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t
                       ) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Example #20
      def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):  # pylint: disable=missing-docstring
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        alpha_p = -alpha * scale

        kept_idx = K.greater_equal(
            K.random_uniform(noise_shape, seed=seed), rate)
        kept_idx = K.cast(kept_idx, K.floatx())

        # Get affine transformation params
        a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5
        b = -a * alpha_p * rate

        # Apply mask
        x = inputs * kept_idx + alpha_p * (1 - kept_idx)

        # Do affine transformation
        return a * x + b
Example #21
      def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):  # pylint: disable=missing-docstring
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        alpha_p = -alpha * scale

        kept_idx = K.greater_equal(
            K.random_uniform(noise_shape, seed=seed), rate)
        kept_idx = K.cast(kept_idx, K.floatx())

        # Get affine transformation params
        a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5
        b = -a * alpha_p * rate

        # Apply mask
        x = inputs * kept_idx + alpha_p * (1 - kept_idx)

        # Do affine transformation
        return a * x + b
Example #22
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [K.update_add(self.iterations, 1)]

    t = K.cast(self.iterations, K.floatx()) + 1

    # Due to the recommendations in [2], i.e. warming momentum schedule
    momentum_cache_t = self.beta_1 * (
        1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
    momentum_cache_t_1 = self.beta_1 * (
        1. - 0.5 *
        (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
    m_schedule_new = self.m_schedule * momentum_cache_t
    m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
    self.updates.append((self.m_schedule, m_schedule_new))

    shapes = [K.int_shape(p) for p in params]
    ms = [K.zeros(shape) for shape in shapes]
    vs = [K.zeros(shape) for shape in shapes]

    self.weights = [self.iterations] + ms + vs

    for p, g, m, v in zip(params, grads, ms, vs):
      # the following equations given in [1]
      g_prime = g / (1. - m_schedule_new)
      m_t = self.beta_1 * m + (1. - self.beta_1) * g
      m_t_prime = m_t / (1. - m_schedule_next)
      v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
      v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
      m_t_bar = (
          1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

      self.updates.append(K.update(m, m_t))
      self.updates.append(K.update(v, v_t))

      p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
Example #23
def _preprocess_symbolic_input(x, data_format, mode):
  """Preprocesses a symbolic image tensor.

      x: symoblic tensor, 3D or 4D.
      data_format: data format of the image tensor.
      mode: One of "caffe", "tf".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,

      Preprocessed tensor.

  if mode == 'tf':
    x /= 127.5
    x -= 1.
    return x

  if data_format == 'channels_first':
    # 'RGB'->'BGR'
    if K.ndim(x) == 3:
      x = x[::-1, ...]
      x = x[:, ::-1, ...]
    # 'RGB'->'BGR'
    x = x[..., ::-1]

  if _IMAGENET_MEAN is None:
    _IMAGENET_MEAN = K.constant(-np.array([103.939, 116.779, 123.68]))
  # Zero-center by mean pixel
  if K.dtype(x) != K.dtype(_IMAGENET_MEAN):
    x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format)
    x = K.bias_add(x, _IMAGENET_MEAN, data_format)
  return x
Example #24
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr *= (1. /
             (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay))))

    for p, g, a in zip(params, grads, accumulators):
      new_a = a + K.square(g)  # update accumulator
      self.updates.append(K.update(a, new_a))
      new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
Example #25
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    self.weights = accumulators
    self.updates = [K.update_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (1. /  # pylint: disable=g-no-augmented-assignment
                 (1. + self.decay * K.cast(self.iterations,

    for p, g, a in zip(params, grads, accumulators):
      # update accumulator
      new_a = self.rho * a + (1. - self.rho) * K.square(g)
      self.updates.append(K.update(a, new_a))
      new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(K.update(p, new_p))
    return self.updates
Example #26
 def __call__(self, w):
   w *= K.cast(w >= 0., K.floatx())
   return w
Example #27
 def __call__(self, w):
     w *= K.cast(w >= 0., K.floatx())
     return w
Example #28
 def _current_learning_rate(y_true, y_pred):
     return optimizer.lr / (1. + optimizer.decay * K.cast(optimizer.iterations, K.dtype(optimizer.decay)))
Example #29
 def __call__(self, w):
     w *= K.cast(K.greater_equal(w, 0.), K.floatx())
     return w
Example #30
 def call(self, inputs):
   if K.dtype(inputs) != 'int32':
     inputs = K.cast(inputs, 'int32')
   out = K.gather(self.embeddings, inputs)
   return out
Example #31
 def __call__(self, w):
   return w * K.cast(K.greater_equal(w, 0.), K.floatx())
Example #32
def sparse_categorical_accuracy(y_true, y_pred):
  return K.cast(
          K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1),
                                         K.floatx())), K.floatx())
Example #33
def sparse_categorical_accuracy(y_true, y_pred):
    return K.cast(
        K.equal(K.max(y_true, axis=-1),
                K.cast(K.argmax(y_pred, axis=-1), K.floatx())), K.floatx())
 def call(self, inputs, mask=None):
   return inputs * K.cast(inputs > self.theta, K.floatx())
Example #35
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
    return K.mean(K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'),
Example #36
 def call(self, inputs, mask=None):
     return inputs * K.cast(inputs > self.theta, K.floatx())
 def call(self, inputs, mask=None):
     return inputs * K.cast(K.greater(inputs, self.theta), K.floatx())
def _model_loss(model, inputs, targets):
    """Calculates the loss for a given model.

     model: The model on which metrics are being calculated.
     inputs: The inputs of the given model. This is typically the mini batch of
              data that is fed to the model.
     targets: The predictions or targets of the given model.

     Returns the model output, total loss and loss value calculated using the
     specified loss function. The total loss includes regularization losses and
     applies masking and sample weighting to the loss value.
    total_loss = 0
    outs = model(inputs)
    if not isinstance(outs, list):
        outs = [outs]

    if not isinstance(targets, list):
        targets = [targets]

    loss_metrics = []
    with K.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            # compute the loss
            output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn,

            mask = outs[i]._keras_mask
            # adapted from weighted_loss_fn
            if mask is not None:
                # mask should have the same shape as output_loss
                output_loss *= mask
                #  the loss per batch should be proportional
                #  to the number of unmasked samples.
                output_loss /= K.mean(mask)

            # adapted from weighted_loss_fn
            # apply sample weighting
            if model.sample_weights:
                # reduce score_array to same ndim as weight array
                ndim = K.ndim(output_loss)
                weight_ndim = K.ndim(model.sample_weights)
                output_loss = K.mean(output_loss,
                                     axis=list(range(weight_ndim, ndim)))
                output_loss *= model.sample_weights
                output_loss /= K.mean(
                    K.cast(K.not_equal(model.sample_weights, 0), K.floatx()))
                output_loss = K.mean(output_loss)

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
                total_loss += loss_weight * output_loss

        total_loss = K.mean(total_loss)
        # Add regularization losses
        custom_losses = []
        for layer in model.layers:
            if layer.losses:
                custom_losses += layer.losses

        if custom_losses:
            total_loss += sum(custom_losses)

    return outs, total_loss, loss_metrics
Example #39
 def call(self, inputs):
   boolean_mask = K.any(
       K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True)
   return inputs * K.cast(boolean_mask, inputs.dtype)
def _model_loss(model, inputs, targets):
  """Calculates the loss for a given model.

     model: The model on which metrics are being calculated.
     inputs: The inputs of the given model. This is typically the mini batch of
              data that is fed to the model.
     targets: The predictions or targets of the given model.

     Returns the model output, total loss and loss value calculated using the
     specified loss function. The total loss includes regularization losses and
     applies masking and sample weighting to the loss value.
  total_loss = 0
  if len(inputs) == 1:
    outs = model.call(inputs[0])
    outs = model.call(inputs)
  if not isinstance(outs, list):
    outs = [outs]

  if not isinstance(targets, list):
    targets = [targets]

  loss_metrics = []
  with K.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      # compute the loss
      output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn,

      mask = outs[i]._keras_mask
      # adapted from weighted_loss_fn
      if mask is not None:
        # mask should have the same shape as output_loss
        output_loss *= mask
        #  the loss per batch should be proportional
        #  to the number of unmasked samples.
        output_loss /= K.mean(mask)

      # adapted from weighted_loss_fn
      # apply sample weighting
      if model.sample_weights:
        # reduce score_array to same ndim as weight array
        ndim = K.ndim(output_loss)
        weight_ndim = K.ndim(model.sample_weights)
        output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim)))
        output_loss *= model.sample_weights
        output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0),
        output_loss = K.mean(output_loss)

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
        total_loss += loss_weight * output_loss

    total_loss = K.mean(total_loss)
    # Add regularization losses
    custom_losses = []
    for layer in model.layers:
      if layer.losses:
        custom_losses += layer.losses

    if custom_losses:
      total_loss += sum(custom_losses)

  return outs, total_loss, loss_metrics
Example #41
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     out = K.gather(self.embeddings, inputs)
     return out
 def call(self, inputs):
     boolean_mask = K.any(K.not_equal(inputs, self.mask_value),
     return inputs * K.cast(boolean_mask, inputs.dtype)
Example #43
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
  return K.mean(K.in_top_k(y_pred,
                           K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1)
 def call(self, inputs, mask=None):
   return inputs * K.cast(K.greater(inputs, self.theta), K.floatx())