Esempio n. 1
0
  def compute_output_shape(self, input_shape):
    input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list())

    if self._output_shape is None:
      if context.executing_eagerly():
        raise NotImplementedError
      x = K.placeholder(shape=input_shape)
      x = self.call(x)
      if isinstance(x, list):
        return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x]
      else:
        return tensor_shape.TensorShape(K.int_shape(x))
    elif isinstance(self._output_shape, (tuple, list)):
      if isinstance(input_shape, list):
        num_samples = input_shape[0][0]
      else:
        num_samples = input_shape[0] if input_shape else None
      return tensor_shape.TensorShape((num_samples,) +
                                      tuple(self._output_shape))
    else:
      shape = self._output_shape(input_shape)
      if not isinstance(shape, (list, tuple)):
        raise ValueError(
            '`output_shape` function must return a tuple or a list of tuples.')
      if isinstance(shape, list):
        if isinstance(shape[0], int) or shape[0] is None:
          shape = tuple(shape)
      return tensor_shape.TensorShape(shape)
Esempio n. 2
0
  def set_model(self, model):
    """Sets Keras model and creates summary ops."""

    self.model = model
    self.sess = K.get_session()
    # only make histogram summary op if it hasn't already been made
    if self.histogram_freq and self.merged is None:
      for layer in self.model.layers:
        for weight in layer.weights:
          mapped_weight_name = weight.name.replace(':', '_')
          tf_summary.histogram(mapped_weight_name, weight)
          if self.write_images:
            w_img = array_ops.squeeze(weight)
            shape = K.int_shape(w_img)
            if len(shape) == 2:  # dense layer kernel case
              if shape[0] > shape[1]:
                w_img = array_ops.transpose(w_img)
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
            elif len(shape) == 3:  # convnet case
              if K.image_data_format() == 'channels_last':
                # switch to channels_first to display
                # every kernel as a separate image
                w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img,
                                        [shape[0], shape[1], shape[2], 1])
            elif len(shape) == 1:  # bias case
              w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
            else:
              # not possible to handle 3D convnets etc.
              continue

            shape = K.int_shape(w_img)
            assert len(shape) == 4 and shape[-1] in [1, 3, 4]
            tf_summary.image(mapped_weight_name, w_img)

        if self.write_grads:
          for weight in layer.trainable_weights:
            mapped_weight_name = weight.name.replace(':', '_')
            grads = model.optimizer.get_gradients(model.total_loss, weight)

            def is_indexed_slices(grad):
              return type(grad).__name__ == 'IndexedSlices'

            grads = [grad.values if is_indexed_slices(grad) else grad
                     for grad in grads]
            tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)

        if hasattr(layer, 'output'):
          tf_summary.histogram('{}_out'.format(layer.name), layer.output)
    self.merged = tf_summary.merge_all()

    if self.write_graph:
      self.writer = self._writer_class(self.log_dir, self.sess.graph)
    else:
      self.writer = self._writer_class(self.log_dir)
Esempio n. 3
0
def sparse_categorical_accuracy(y_true, y_pred):
  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
    y_true = array_ops.squeeze(y_true, [-1])
  y_pred = math_ops.argmax(y_pred, axis=-1)

  # If the expected labels are float, we need to cast the int returned by
  # argmax to compare.
  if K.dtype(y_true) == K.floatx():
    y_pred = math_ops.cast(y_pred, K.floatx())

  return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
Esempio n. 4
0
def sparse_categorical_accuracy(y_true, y_pred):
  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
    y_true = array_ops.squeeze(y_true, [-1])
  y_pred = math_ops.argmax(y_pred, axis=-1)

  # If the predicted output and actual output types don't match, force cast them
  # to match.
  if K.dtype(y_pred) != K.dtype(y_true):
    y_pred = math_ops.cast(y_pred, K.dtype(y_true))

  return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
  def __call__(self, inputs, initial_state=None, constants=None, **kwargs):
    inputs, initial_state, constants = _standardize_args(
        inputs, initial_state, constants, self._num_constants)

    if initial_state is None and constants is None:
      return super(ConvRNN2D, self).__call__(inputs, **kwargs)

    # If any of `initial_state` or `constants` are specified and are Keras
    # tensors, then add them to the inputs and temporarily modify the
    # input_spec to include them.

    additional_inputs = []
    additional_specs = []
    if initial_state is not None:
      kwargs['initial_state'] = initial_state
      additional_inputs += initial_state
      self.state_spec = []
      for state in initial_state:
        shape = K.int_shape(state)
        self.state_spec.append(InputSpec(shape=shape))

      additional_specs += self.state_spec
    if constants is not None:
      kwargs['constants'] = constants
      additional_inputs += constants
      self.constants_spec = [InputSpec(shape=K.int_shape(constant))
                             for constant in constants]
      self._num_constants = len(constants)
      additional_specs += self.constants_spec
    # at this point additional_inputs cannot be empty
    for tensor in additional_inputs:
      if K.is_keras_tensor(tensor) != K.is_keras_tensor(additional_inputs[0]):
        raise ValueError('The initial state or constants of an RNN'
                         ' layer cannot be specified with a mix of'
                         ' Keras tensors and non-Keras tensors')

    if K.is_keras_tensor(additional_inputs[0]):
      # Compute the full input spec, including state and constants
      full_input = [inputs] + additional_inputs
      full_input_spec = self.input_spec + additional_specs
      # Perform the call with temporarily replaced input_spec
      original_input_spec = self.input_spec
      self.input_spec = full_input_spec
      output = super(ConvRNN2D, self).__call__(full_input, **kwargs)
      self.input_spec = original_input_spec
      return output
    else:
      return super(ConvRNN2D, self).__call__(inputs, **kwargs)
Esempio n. 6
0
 def compute_output_shape(self, input_shape):
   if self._output_shape is None:
     if context.executing_eagerly():
       # Make use of existing autocomputation for Eager mode but provide
       # Lambda-specific error message.
       try:
         return super(Lambda, self).compute_output_shape(input_shape)
       except NotImplementedError:
         raise NotImplementedError('We could not automatically infer '
                                   'the static shape of the Lambda\'s output.'
                                   ' Please specify the `output_shape` for'
                                   ' this Lambda.')
     if isinstance(input_shape, list):
       x = [K.placeholder(shape=shape) for shape in input_shape]
     else:
       x = K.placeholder(shape=input_shape)
     x = self.call(x)
     if isinstance(x, list):
       return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x]
     else:
       return tensor_shape.TensorShape(K.int_shape(x))
   elif isinstance(self._output_shape, (tuple, list)):
     if isinstance(input_shape, list):
       num_samples = input_shape[0][0]
     else:
       num_samples = input_shape[0] if input_shape else None
     # List here represents multiple outputs.
     if isinstance(self._output_shape, list):
       return [
           tensor_shape.TensorShape((num_samples,) + tuple(single_shape))
           for single_shape in self._output_shape
       ]
     return tensor_shape.TensorShape((num_samples,) + self._output_shape)
   else:
     shape = self._output_shape(input_shape)
     if not isinstance(shape, (list, tuple)):
       raise ValueError(
           '`output_shape` function must return a tuple or a list of tuples.')
     # List here can represent multiple outputs or single output.
     if isinstance(shape, list):
       # Convert list representing single output into a tuple.
       if isinstance(shape[0], (int, type(None))):
         shape = tuple(shape)
       else:
         return [
             tensor_shape.TensorShape(single_shape) for single_shape in shape
         ]
     return tensor_shape.TensorShape(shape)
Esempio n. 7
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. / (1. + self.decay * math_ops.cast(self.iterations,
                                                K.dtype(self.decay))))
    # momentum
    shapes = [K.int_shape(p) for p in params]
    moments = [K.zeros(shape) for shape in shapes]
    self.weights = [self.iterations] + moments
    for p, g, m in zip(params, grads, moments):
      v = self.momentum * m - lr * g  # velocity
      self.updates.append(state_ops.assign(m, v))

      if self.nesterov:
        new_p = p + self.momentum * v - lr * g
      else:
        new_p = p + v

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Esempio n. 8
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    delta_accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators + delta_accumulators
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. / (1. + self.decay * math_ops.cast(self.iterations,
                                                K.dtype(self.decay))))

    for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
      # update accumulator
      new_a = self.rho * a + (1. - self.rho) * math_ops.square(g)
      self.updates.append(state_ops.assign(a, new_a))

      # use the new accumulator and the *old* delta_accumulator
      update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
      new_p = p - lr * update

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))

      # update delta_accumulator
      new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update)
      self.updates.append(state_ops.assign(d_a, new_d_a))
    return self.updates
Esempio n. 9
0
    def opt_variable(value, dtype=None, name=None, constraint=None):
      """Instantiates a variable and returns it."""
      if dtype is None:
        dtype = backend.floatx()

      variables = []
      for i in range(num_replicas):
        # Keras holds the variables in optimizer class instance , so the name
        # does not matter here. ResourceVariable constructor will find a unique
        # name (including name=None) for each replica.
        with ops.device("device:TPU:{}".format(i)):
          v = resource_variable_ops.ResourceVariable(
              value,
              dtype=dtypes_module.as_dtype(dtype),
              name=name,
              constraint=constraint)
          variables.append(v)
      name = "replicate_{}_{}".format("variable" if name is None else name,
                                      ops.uid())
      v = ReplicatedVariable(name, variables)

      # pylint: disable=protected-access

      if isinstance(value, np.ndarray):
        v._keras_shape = value.shape
      elif hasattr(value, "shape"):
        v._keras_shape = backend.int_shape(value)
      v._uses_learning_phase = False
      backend.track_variable(v)
      return v
Esempio n. 10
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. /
          (1. +
           self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

    for p, g, a in zip(params, grads, accumulators):
      new_a = a + math_ops.square(g)  # update accumulator
      self.updates.append(state_ops.assign(a, new_a))
      new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Esempio n. 11
0
  def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None):
    """Finds non-specific dimensions in the static shapes.

    The static shapes are replaced with the corresponding dynamic shapes of the
    tensor.

    Arguments:
      init_tuple: a tuple, the first part of the output shape
      tensor: the tensor from which to get the (static and dynamic) shapes
        as the last part of the output shape
      start_idx: int, which indicate the first dimension to take from
        the static shape of the tensor
      int_shape: an alternative static shape to take as the last part
        of the output shape

    Returns:
      The new int_shape with the first part from init_tuple
      and the last part from either `int_shape` (if provided)
      or `tensor.shape`, where every `None` is replaced by
      the corresponding dimension from `tf.shape(tensor)`.
    """
    # replace all None in int_shape by K.shape
    if int_shape is None:
      int_shape = K.int_shape(tensor)[start_idx:]
    if not any(not s for s in int_shape):
      return init_tuple + tuple(int_shape)
    shape = K.shape(tensor)
    int_shape = list(int_shape)
    for i, s in enumerate(int_shape):
      if not s:
        int_shape[i] = shape[start_idx + i]
    return init_tuple + tuple(int_shape)
Esempio n. 12
0
  def call(self, inputs, training=None, mask=None):
    kwargs = {}
    if generic_utils.has_arg(self.layer.call, 'training'):
      kwargs['training'] = training
    uses_learning_phase = False  # pylint: disable=redefined-outer-name

    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, use rnn-based implementation
      def step(x, _):
        global uses_learning_phase  # pylint: disable=global-variable-undefined
        output = self.layer.call(x, **kwargs)
        if hasattr(output, '_uses_learning_phase'):
          uses_learning_phase = (output._uses_learning_phase or
                                 uses_learning_phase)
        return output, []

      _, outputs, _ = K.rnn(
          step,
          inputs,
          initial_states=[],
          input_length=input_shape[1],
          unroll=False)
      y = outputs
    else:
      # No batch size specified, therefore the layer will be able
      # to process batches of any size.
      # We can go with reshape-based implementation for performance.
      input_length = input_shape[1]
      if not input_length:
        input_length = array_ops.shape(inputs)[1]
      inner_input_shape = self._get_shape_tuple((-1,), inputs, 2)
      # Shape: (num_samples * timesteps, ...). And track the
      # transformation in self._input_map.
      input_uid = generic_utils.object_list_uid(inputs)
      inputs = array_ops.reshape(inputs, inner_input_shape)
      self._input_map[input_uid] = inputs
      # (num_samples * timesteps, ...)
      if generic_utils.has_arg(self.layer.call, 'mask') and mask is not None:
        inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
        kwargs['mask'] = K.reshape(mask, inner_mask_shape)
      y = self.layer.call(inputs, **kwargs)
      if hasattr(y, '_uses_learning_phase'):
        uses_learning_phase = y._uses_learning_phase
      # Shape: (num_samples, timesteps, ...)
      output_shape = self.compute_output_shape(input_shape).as_list()
      output_shape = self._get_shape_tuple(
          (-1, input_length), y, 1, output_shape[2:])
      y = array_ops.reshape(y, output_shape)

    # Apply activity regularizer if any:
    if (hasattr(self.layer, 'activity_regularizer') and
        self.layer.activity_regularizer is not None):
      regularization_loss = self.layer.activity_regularizer(y)
      self.add_loss(regularization_loss, inputs)

    if uses_learning_phase:
      y._uses_learning_phase = True
    return y
Esempio n. 13
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = []

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. /
          (1. +
           self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

    with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
      t = math_ops.cast(self.iterations, K.floatx())
    lr_t = lr * (
        K.sqrt(1. - math_ops.pow(self.beta_2, t)) /
        (1. - math_ops.pow(self.beta_1, t)))

    ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    if self.amsgrad:
      vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
    else:
      vhats = [K.zeros(1) for _ in params]
    self.weights = [self.iterations] + ms + vs + vhats

    for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
      m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
      v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
      if self.amsgrad:
        vhat_t = math_ops.maximum(vhat, v_t)
        p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
        self.updates.append(state_ops.assign(vhat, vhat_t))
      else:
        p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

      self.updates.append(state_ops.assign(m, m_t))
      self.updates.append(state_ops.assign(v, v_t))
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Esempio n. 14
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # GRU does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      # CuDNN does not support masking, fall back to use the normal GRU.
      kwargs = {'training': training}

      def step(cell_inputs, cell_states):
        return self.cell.call(cell_inputs, cell_states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      # This is a dummy tensor for testing purpose.
      runtime = _runtime('unknown')
    else:
      last_output, outputs, runtime, states = self._defun_gru_call(
          inputs, initial_state, training, mask)

    if self.stateful:
      updates = [state_ops.assign(self.states[0], states[0])]
      self.add_update(updates, inputs)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self._return_runtime:
      return output, runtime
    else:
      return output
Esempio n. 15
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = []

    with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
      t = math_ops.cast(self.iterations, K.floatx())

    # Due to the recommendations in [2], i.e. warming momentum schedule
    momentum_cache_t = self.beta_1 * (
        1. - 0.5 *
        (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
    momentum_cache_t_1 = self.beta_1 * (
        1. - 0.5 *
        (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
    m_schedule_new = self.m_schedule * momentum_cache_t
    m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
    self.updates.append((self.m_schedule, m_schedule_new))

    shapes = [K.int_shape(p) for p in params]
    ms = [K.zeros(shape) for shape in shapes]
    vs = [K.zeros(shape) for shape in shapes]

    self.weights = [self.iterations, self.m_schedule] + ms + vs

    for p, g, m, v in zip(params, grads, ms, vs):
      # the following equations given in [1]
      g_prime = g / (1. - m_schedule_new)
      m_t = self.beta_1 * m + (1. - self.beta_1) * g
      m_t_prime = m_t / (1. - m_schedule_next)
      v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
      v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
      m_t_bar = (1. -
                 momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

      self.updates.append(state_ops.assign(m, m_t))
      self.updates.append(state_ops.assign(v, v_t))

      p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Esempio n. 16
0
def transition_block(x, reduction, name):
  """A transition block.

  Arguments:
      x: input tensor.
      reduction: float, compression rate at transition layers.
      name: string, block label.

  Returns:
      output tensor for the block.
  """
  bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
  x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x)
  x = Activation('relu', name=name + '_relu')(x)
  x = Conv2D(
      int(K.int_shape(x)[bn_axis] * reduction),
      1,
      use_bias=False,
      name=name + '_conv')(
          x)
  x = AveragePooling2D(2, strides=2, name=name + '_pool')(x)
  return x
Esempio n. 17
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = []

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. /
          (1. +
           self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

    with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
      t = math_ops.cast(self.iterations, K.floatx())
    lr_t = lr / (1. - math_ops.pow(self.beta_1, t))

    shapes = [K.int_shape(p) for p in params]
    # zero init of 1st moment
    ms = [K.zeros(shape) for shape in shapes]
    # zero init of exponentially weighted infinity norm
    us = [K.zeros(shape) for shape in shapes]
    self.weights = [self.iterations] + ms + us

    for p, g, m, u in zip(params, grads, ms, us):

      m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
      u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g))
      p_t = p - lr_t * m_t / (u_t + self.epsilon)

      self.updates.append(state_ops.assign(m, m_t))
      self.updates.append(state_ops.assign(u, u_t))
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Esempio n. 18
0
def normal_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, units,
                activation, recurrent_activation):
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[1]

  def step(cell_inputs, cell_states):
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    # Only use the second half of the bias weights.
    _, real_bias = array_ops.split(bias, 2)

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, real_bias)

    z0 = z[:, :units]
    z1 = z[:, units:2 * units]
    z2 = z[:, 2 * units:3 * units]
    z3 = z[:, 3 * units:]

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]

  _, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      input_length=timesteps)
  return outputs, new_states, constant_op.constant(
      'cpu', dtype=dtypes.string, name='runtime')
Esempio n. 19
0
def _eager_metrics_fn(model, outputs, targets):
  """Calculates the metrics for each output of the given model.

  Arguments:
      model: The model on which metrics are being calculated.
      outputs: The outputs of the given model.
      targets: The predictions or targets of the given model.

  Returns:
      Returns the metric names and metric results for each output of the model.
  """
  metric_names = []
  metric_results = []
  if not isinstance(outputs, list):
    outputs = [outputs]

  if not isinstance(targets, list):
    targets = [targets]

  for i in range(len(model.outputs)):
    output_metrics = model.nested_metrics[i]
    for nested_output_metric in output_metrics:
      metric_name, metric_fn = _get_metrics_info(
          nested_output_metric, backend.int_shape(model.outputs[i]),
          model.loss_functions[i])

      if len(model.output_names) > 1:
        metric_name = model.output_names[i] + '_' + metric_name
        if metric_name not in model.metrics_names:
          model.metrics_names.append(metric_name)

      with backend.name_scope(metric_name):
        metric_result = metric_fn(targets[i], outputs[i])
        metric_names.append(metric_name)
        metric_results.append(backend.mean(metric_result))

  return metric_results
Esempio n. 20
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)
        reduction_axes = list(range(0, len(input_shape)))

        if self.axis is not None:
            del reduction_axes[self.axis]

        del reduction_axes[0]

        mean = K.mean(inputs, reduction_axes, keepdims=True)
        stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
        normed = (inputs - mean) / stddev

        broadcast_shape = [1] * len(input_shape)
        if self.axis is not None:
            broadcast_shape[self.axis] = input_shape[self.axis]

        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            normed = normed * broadcast_gamma
        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            normed = normed + broadcast_beta
        return normed
def test_get_complex_value_gradients(input_layer, batch_size,
                                     conjugate_gradients):
    with DEFAULT_TF_GRAPH.as_default():
        machine = Linear(input_layer)
        model = Model(inputs=[input_layer], outputs=machine.predictions)
        if tensorflow.__version__ >= '1.14':
            optimizer = ComplexValuesOptimizer(model,
                                               machine.predictions_jacobian,
                                               name='optimizer')
        else:
            optimizer = ComplexValuesOptimizer(model,
                                               machine.predictions_jacobian)
        loss = Multiply()([machine.predictions, machine.predictions])
        manual_gradients_layer = Lambda(
            lambda x: tensorflow.reshape(tensorflow.reduce_sum(2.0 * x[0] * x[1], axis=0),
                                         machine.dense_layer.kernel.shape)) \
            ([machine.predictions, machine.manual_jacobian])
        if conjugate_gradients:
            manual_gradients_layer = Lambda(lambda x: tensorflow.conj(x))(
                manual_gradients_layer)
        manual_gradients_function = K.function(
            inputs=[input_layer], outputs=[manual_gradients_layer])
        complex_value_gradients_layer = Lambda(
            lambda x: optimizer.get_model_parameters_complex_value_gradients(
                tensorflow.real(x), conjugate_gradients=conjugate_gradients))(
                    loss)
        complex_value_gradients_function = K.function(
            inputs=[input_layer], outputs=[complex_value_gradients_layer])
        sample = numpy.random.choice(
            2, (batch_size, ) + K.int_shape(input_layer)[1:]) * 2 - 1
        complex_value_gradients = complex_value_gradients_function([sample])[0]
        manual_gradients = manual_gradients_function([sample])[0]
        diff_norm = numpy.linalg.norm(complex_value_gradients -
                                      manual_gradients)
        gradients_norm = numpy.linalg.norm(manual_gradients)
        assert (diff_norm / gradients_norm) < 1e-5
Esempio n. 22
0
def normal_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, units):
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[1]

  def step(cell_inputs, cell_states):
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    # Only use the second half of the bias weights.
    _, real_bias = array_ops.split(bias, 2)

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, real_bias)

    z0 = z[:, :units]
    z1 = z[:, units:2 * units]
    z2 = z[:, 2 * units:3 * units]
    z3 = z[:, 3 * units:]

    i = activations.get('hard_sigmoid')(z0)
    f = activations.get('hard_sigmoid')(z1)
    c = f * c_tm1 + i * activations.get('tanh')(z2)
    o = activations.get('hard_sigmoid')(z3)

    h = o * activations.get('tanh')(c)
    return h, [h, c]

  _, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      input_length=timesteps)
  return outputs, new_states, constant_op.constant(
      'cpu', dtype=dtypes.string, name='runtime')
Esempio n. 23
0
def transition_block(x, reduction, name):
  """A transition block.

  Arguments:
    x: input tensor.
    reduction: float, compression rate at transition layers.
    name: string, block label.

  Returns:
    output tensor for the block.
  """
  bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
  x = layers.BatchNormalization(
      axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(
          x)
  x = layers.Activation('relu', name=name + '_relu')(x)
  x = layers.Conv2D(
      int(backend.int_shape(x)[bn_axis] * reduction),
      1,
      use_bias=False,
      name=name + '_conv')(
          x)
  x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x)
  return x
Esempio n. 24
0
    def call(self, inputs):
        if self.conv_singular:
            sigma, u_bar = max_singular_val_for_convolution(
                self.kernel,
                self.u,
                fully_differentiable=self.fully_diff_spectral,
                ip=self.spectral_iterations,
                padding=self.padding,
                strides=self.strides,
                data_format=self.data_format)
            kernel_sn = self.kernel / sigma
            self.add_update(K.update(self.u, u_bar))
        else:
            kernel_shape = K.int_shape(self.kernel)
            w = K.reshape(self.kernel,
                          (kernel_shape[0] * kernel_shape[1] * kernel_shape[2],
                           kernel_shape[3]))

            sigma, u_bar = max_singular_val(
                w,
                self.u,
                fully_differentiable=self.fully_diff_spectral,
                ip=self.spectral_iterations)

            w_sn = w / sigma

            kernel_sn = K.reshape(w_sn, kernel_shape)

            self.add_update(K.update(self.u, u_bar))

        kernel = self.kernel
        self.kernel = kernel_sn
        outputs = super(SNConv2D, self).call(inputs)
        self.kernel = kernel

        return outputs
Esempio n. 25
0
def transition_block(x, reduction, name):
    """A transition block.

    # Arguments
        x: input tensor.
        reduction: float, compression rate at transition layers.
        name: string, block label.

    # Returns
        output tensor for the block.
    """
    bn_axis = 3
    x = layers.BatchNormalization(axis=bn_axis,
                                  epsilon=1.001e-5,
                                  name=name + '_bn')(x)
    x = layers.Activation('relu', name=name + '_relu')(x)
    x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction),
                      1,
                      use_bias=False,
                      kernel_regularizer=regularizers.l2(l2_reg),
                      name=name + '_conv')(x)
    conv_before_pool = x
    x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x)
    return conv_before_pool, x
Esempio n. 26
0
    def call(self, inputs, **kwargs):
        outputs = inputs
        for i in range(self.depth):
            if self.use_bias:
                outputs = outputs + self.conv_biases[i]
            outputs = self.conv_layers[i](outputs)

            if i < (self.depth - 1):
                if self.activation is not None:
                    if self.use_bias:
                        outputs = outputs + self.activation_biases[i]
                    outputs = self.activation(outputs)

        if self.use_projection(backend.int_shape(inputs)):
            inputs = self.projection_layer(inputs)

        # x_k+1 = x_k + a*f(x_k) + b
        outputs = outputs * self.residual_multiplier
        if self.use_bias:
            outputs = outputs + self.residual_bias
        outputs = inputs + outputs

        outputs = self.activation(outputs)
        return outputs
Esempio n. 27
0
    def call(self, x):
        """Feedforward
        :param x:  [inputs, targets]
        :return: hidden tensor
        """
        inputs = x[0]
        targets = x[1]
        pred_g = self.g_network_decoder(self.g_network_encoder(inputs))

        # residual
        r = Lambda((lambda x: x[1] - x[0]))([pred_g, targets])
        out_dim = K.int_shape(self.phi_network_conv(r))  # shape=(?, 64, 7, 7)
        z = self.phi_network_fc(
            K.reshape(
                self.phi_network_conv(r),
                (self.opt.batch_size, out_dim[1] * out_dim[2] * out_dim[3])))
        z = K.reshape(z, (self.opt.batch_size, self.opt.n_latent))
        z_emb = self.encoder_latent(z)
        z_emb = K.reshape(z_emb,
                          (self.opt.batch_size, self.opt.nfeature, 1, 1))
        s = self.f_network_encoder(inputs)
        return Lambda(
            (lambda x: tf.math.add(x[0], x[1])))([s, z_emb
                                                  ])  # tf.math.add : broadcast
def bregnet_module(X, num_filters, block_name, module_name, alpha=1.0):
    main = Conv2D(filters=num_filters,
                  kernel_size=(3, 3),
                  padding='same',
                  kernel_initializer='he_normal',
                  name='conv_{}_{}_1'.format(block_name, module_name))(X)
    main = ELU(alpha=alpha, name='elu_{}_{}_1'.format(block_name,
                                                      module_name))(main)
    main = Conv2D(filters=num_filters,
                  kernel_size=(3, 3),
                  padding='same',
                  kernel_initializer='he_normal',
                  name='conv_{}_{}_2'.format(block_name, module_name))(main)
    main = BatchNormalization(
        name='bn_{}_{}'.format(block_name, module_name))(main)
    if K.int_shape(X)[-1] != num_filters:
        X = Conv2D(filters=num_filters,
                   kernel_size=(1, 1),
                   padding='same',
                   kernel_initializer='he_normal',
                   name='conv_{}_{}_skip'.format(block_name, module_name))(X)
    shortcut = Lambda(residual_connection)(X)
    out = Add()([main, shortcut])
    return out
Esempio n. 29
0
    def call(self, inputs):
        data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2)
        inputs, tf_data_format = K._preprocess_conv2d_input(inputs, self.data_format)

        inputs = tf.image.extract_patches(
            inputs,
            sizes=(1,) + K.int_shape(self.kernel)[:2] + (1,),
            strides=(1,) + self.strides + (1,),
            rates=(1,) + self.dilation_rate + (1,),
            padding=self.padding.upper(),
        )

        kernel = K.reshape(self.kernel, (-1, self.filters))
        outputs = self.kernel_function([inputs, kernel])

        if self.data_format == 'channels_first':
            outputs = K.permute_dimensions(outputs, (0, 3, 1, 2))

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias, data_format=data_format)

        if self.activation is not None:
            outputs = self.activation(outputs)
        return outputs
Esempio n. 30
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        shapes = [K.int_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (  # pylint: disable=g-no-augmented-assignment
                1. / (1. + self.decay *
                      math_ops.cast(self.iterations, K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            new_a = a + math_ops.square(g)  # update accumulator
            self.updates.append(state_ops.assign(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Esempio n. 31
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (  # pylint: disable=g-no-augmented-assignment
                1. / (1. + self.decay *
                      math_ops.cast(self.iterations, K.dtype(self.decay))))

        t = math_ops.cast(self.iterations, K.floatx()) + 1
        lr_t = lr / (1. - math_ops.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g))
            p_t = p - lr_t * m_t / (u_t + self.epsilon)

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(u, u_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Esempio n. 32
0
    def call(self, x, training=None):
        def outputs_inference():
            # Apply truncation trick according to cutoff.
            num_layers = K.int_shape(x)[1]

            if self.cutoff is not None:
                beta = Ke.where(
                    np.arange(num_layers)[np.newaxis, :,
                                          np.newaxis] < self.cutoff,
                    self.psi *
                    np.ones(shape=(1, num_layers, 1), dtype=np.float32),
                    np.ones(shape=(1, num_layers, 1), dtype=np.float32))  #?
            else:
                beta = np.ones(shape=(1, num_layers, 1), dtype=np.float32)

            return self.moving_mean + (x - self.moving_mean) * beta  #?

        # Update moving average.
        mean = K.mean(x[:, 0], axis=0)  #?
        x_moving_mean = K.moving_average_update(self.moving_mean, mean,
                                                self.momentum)  #? add_update?

        # Apply truncation trick according to cutoff.
        num_layers = K.int_shape(x)[1]

        if self.cutoff is not None:
            beta = Ke.where(
                np.arange(num_layers)[np.newaxis, :, np.newaxis] < self.cutoff,
                self.psi * np.ones(shape=(1, num_layers, 1), dtype=np.float32),
                np.ones(shape=(1, num_layers, 1), dtype=np.float32))  #?
        else:
            beta = np.ones(shape=(1, num_layers, 1), dtype=np.float32)

        outputs = x_moving_mean + (x - self.moving_mean) * beta  #?

        return K.in_train_phase(outputs, outputs_inference, training=training)
Esempio n. 33
0
    def __call__(self, inputs, initial_state=None, constants=None, **kwargs):
        """`Bidirectional.__call__` implements the same API as the wrapped `RNN`."""
        inputs, initial_state, constants = _standardize_args(
            inputs, initial_state, constants, self._num_constants)

        if isinstance(inputs, list):
            if len(inputs) > 1:
                initial_state = inputs[1:]
            inputs = inputs[0]

        if initial_state is None and constants is None:
            return super(Bidirectional, self).__call__(inputs, **kwargs)

        # Applies the same workaround as in `RNN.__call__`
        additional_inputs = []
        additional_specs = []
        if initial_state is not None:
            # Check if `initial_state` can be splitted into half
            num_states = len(initial_state)
            if num_states % 2 > 0:
                raise ValueError(
                    'When passing `initial_state` to a Bidirectional RNN, '
                    'the state should be a list containing the states of '
                    'the underlying RNNs. '
                    'Found: ' + str(initial_state))

            kwargs['initial_state'] = initial_state
            additional_inputs += initial_state
            state_specs = [
                InputSpec(shape=K.int_shape(state)) for state in initial_state
            ]
            self.forward_layer.state_spec = state_specs[:num_states // 2]
            self.backward_layer.state_spec = state_specs[num_states // 2:]
            additional_specs += state_specs
        if constants is not None:
            kwargs['constants'] = constants
            additional_inputs += constants
            constants_spec = [
                InputSpec(shape=K.int_shape(constant))
                for constant in constants
            ]
            self.forward_layer.constants_spec = constants_spec
            self.backward_layer.constants_spec = constants_spec
            additional_specs += constants_spec

            self._num_constants = len(constants)
            self.forward_layer._num_constants = self._num_constants
            self.backward_layer._num_constants = self._num_constants

        is_keras_tensor = K.is_keras_tensor(additional_inputs[0])
        for tensor in additional_inputs:
            if K.is_keras_tensor(tensor) != is_keras_tensor:
                raise ValueError('The initial state of a Bidirectional'
                                 ' layer cannot be specified with a mix of'
                                 ' Keras tensors and non-Keras tensors'
                                 ' (a "Keras tensor" is a tensor that was'
                                 ' returned by a Keras layer, or by `Input`)')

        if is_keras_tensor:
            # Compute the full input spec, including state
            full_input = [inputs] + additional_inputs
            full_input_spec = self.input_spec + additional_specs

            # Perform the call with temporarily replaced input_spec
            original_input_spec = self.input_spec
            self.input_spec = full_input_spec
            output = super(Bidirectional, self).__call__(full_input, **kwargs)
            self.input_spec = original_input_spec
            return output
        else:
            return super(Bidirectional, self).__call__(inputs, **kwargs)
Esempio n. 34
0
def preprocess_weights_for_loading(layer,
                                   weights,
                                   original_keras_version=None,
                                   original_backend=None):
  """Preprocess layer weights between different Keras formats.

  Converts layers weights from Keras 1 format to Keras 2 and also weights of
  CuDNN layers in Keras 2.

  Arguments:
      layer: Layer instance.
      weights: List of weights values (Numpy arrays).
      original_keras_version: Keras version for the weights, as a string.
      original_backend: Keras backend the weights were trained with,
          as a string.

  Returns:
      A list of weights values (Numpy arrays).
  """
  def convert_nested_bidirectional(weights):
    """Converts layers nested in `Bidirectional` wrapper.

    This function uses `preprocess_weights_for_loading()` for converting
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
    num_weights_per_layer = len(weights) // 2
    forward_weights = preprocess_weights_for_loading(
        layer.forward_layer, weights[:num_weights_per_layer],
        original_keras_version, original_backend)
    backward_weights = preprocess_weights_for_loading(
        layer.backward_layer, weights[num_weights_per_layer:],
        original_keras_version, original_backend)
    return forward_weights + backward_weights

  def convert_nested_time_distributed(weights):
    """Converts layers nested in `TimeDistributed` wrapper.

    This function uses `preprocess_weights_for_loading()` for converting nested
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
    return preprocess_weights_for_loading(
        layer.layer, weights, original_keras_version, original_backend)

  def convert_nested_model(weights):
    """Converts layers nested in `Model` or `Sequential`.

    This function uses `preprocess_weights_for_loading()` for converting nested
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
    new_weights = []
    # trainable weights
    for sublayer in layer.layers:
      num_weights = len(sublayer.trainable_weights)
      if num_weights > 0:
        new_weights.extend(preprocess_weights_for_loading(
            layer=sublayer,
            weights=weights[:num_weights],
            original_keras_version=original_keras_version,
            original_backend=original_backend))
        weights = weights[num_weights:]

    # non-trainable weights
    for sublayer in layer.layers:
      num_weights = len([l for l in sublayer.weights
                         if l not in sublayer.trainable_weights])
      if num_weights > 0:
        new_weights.extend(preprocess_weights_for_loading(
            layer=sublayer,
            weights=weights[:num_weights],
            original_keras_version=original_keras_version,
            original_backend=original_backend))
        weights = weights[num_weights:]
    return new_weights

  # Convert layers nested in Bidirectional/Model/Sequential.
  # Both transformation should be ran for both Keras 1->2 conversion
  # and for conversion of CuDNN layers.
  if layer.__class__.__name__ == 'Bidirectional':
    weights = convert_nested_bidirectional(weights)
  if layer.__class__.__name__ == 'TimeDistributed':
    weights = convert_nested_time_distributed(weights)
  elif layer.__class__.__name__ in ['Model', 'Sequential']:
    weights = convert_nested_model(weights)

  if original_keras_version == '1':
    if layer.__class__.__name__ == 'TimeDistributed':
      weights = preprocess_weights_for_loading(
          layer.layer, weights, original_keras_version, original_backend)

    if layer.__class__.__name__ == 'Conv1D':
      shape = weights[0].shape
      # Handle Keras 1.1 format
      if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters:
        # Legacy shape:
        # (filters, input_dim, filter_length, 1)
        assert shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0],
                                                           1)
        weights[0] = np.transpose(weights[0], (2, 3, 1, 0))
      weights[0] = weights[0][:, 0, :, :]

    if layer.__class__.__name__ == 'Conv2D':
      if layer.data_format == 'channels_first':
        # old: (filters, stack_size, kernel_rows, kernel_cols)
        # new: (kernel_rows, kernel_cols, stack_size, filters)
        weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

    if layer.__class__.__name__ == 'Conv2DTranspose':
      if layer.data_format == 'channels_last':
        # old: (kernel_rows, kernel_cols, stack_size, filters)
        # new: (kernel_rows, kernel_cols, filters, stack_size)
        weights[0] = np.transpose(weights[0], (0, 1, 3, 2))
      if layer.data_format == 'channels_first':
        # old: (filters, stack_size, kernel_rows, kernel_cols)
        # new: (kernel_rows, kernel_cols, filters, stack_size)
        weights[0] = np.transpose(weights[0], (2, 3, 0, 1))

    if layer.__class__.__name__ == 'Conv3D':
      if layer.data_format == 'channels_first':
        # old: (filters, stack_size, ...)
        # new: (..., stack_size, filters)
        weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0))

    if layer.__class__.__name__ == 'GRU':
      if len(weights) == 9:
        kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1)
        recurrent_kernel = np.concatenate(
            [weights[1], weights[4], weights[7]], axis=-1)
        bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1)
        weights = [kernel, recurrent_kernel, bias]

    if layer.__class__.__name__ == 'LSTM':
      if len(weights) == 12:
        # old: i, c, f, o
        # new: i, f, c, o
        kernel = np.concatenate(
            [weights[0], weights[6], weights[3], weights[9]], axis=-1)
        recurrent_kernel = np.concatenate(
            [weights[1], weights[7], weights[4], weights[10]], axis=-1)
        bias = np.concatenate(
            [weights[2], weights[8], weights[5], weights[11]], axis=-1)
        weights = [kernel, recurrent_kernel, bias]

    if layer.__class__.__name__ == 'ConvLSTM2D':
      if len(weights) == 12:
        kernel = np.concatenate(
            [weights[0], weights[6], weights[3], weights[9]], axis=-1)
        recurrent_kernel = np.concatenate(
            [weights[1], weights[7], weights[4], weights[10]], axis=-1)
        bias = np.concatenate(
            [weights[2], weights[8], weights[5], weights[11]], axis=-1)
        if layer.data_format == 'channels_first':
          # old: (filters, stack_size, kernel_rows, kernel_cols)
          # new: (kernel_rows, kernel_cols, stack_size, filters)
          kernel = np.transpose(kernel, (2, 3, 1, 0))
          recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0))
        weights = [kernel, recurrent_kernel, bias]

  conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D']
  if layer.__class__.__name__ in conv_layers:
    if original_backend == 'theano':
      weights[0] = conv_utils.convert_kernel(weights[0])
      if layer.__class__.__name__ == 'ConvLSTM2D':
        weights[1] = conv_utils.convert_kernel(weights[1])
    if K.int_shape(layer.weights[0]) != weights[0].shape:
      weights[0] = np.transpose(weights[0], (3, 2, 0, 1))
      if layer.__class__.__name__ == 'ConvLSTM2D':
        weights[1] = np.transpose(weights[1], (3, 2, 0, 1))

  # convert CuDNN layers
  return _convert_rnn_weights(layer, weights)
Esempio n. 35
0
def preprocess_weights_for_loading(layer,
                                   weights,
                                   original_keras_version=None,
                                   original_backend=None):
    """Preprocess layer weights between different Keras formats.

  Converts layers weights from Keras 1 format to Keras 2 and also weights of
  CuDNN layers in Keras 2.

  Arguments:
      layer: Layer instance.
      weights: List of weights values (Numpy arrays).
      original_keras_version: Keras version for the weights, as a string.
      original_backend: Keras backend the weights were trained with,
          as a string.

  Returns:
      A list of weights values (Numpy arrays).
  """
    def convert_nested_bidirectional(weights):
        """Converts layers nested in `Bidirectional` wrapper.

    This function uses `preprocess_weights_for_loading()` for converting
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
        num_weights_per_layer = len(weights) // 2
        forward_weights = preprocess_weights_for_loading(
            layer.forward_layer, weights[:num_weights_per_layer],
            original_keras_version, original_backend)
        backward_weights = preprocess_weights_for_loading(
            layer.backward_layer, weights[num_weights_per_layer:],
            original_keras_version, original_backend)
        return forward_weights + backward_weights

    def convert_nested_time_distributed(weights):
        """Converts layers nested in `TimeDistributed` wrapper.

    This function uses `preprocess_weights_for_loading()` for converting nested
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
        return preprocess_weights_for_loading(layer.layer, weights,
                                              original_keras_version,
                                              original_backend)

    def convert_nested_model(weights):
        """Converts layers nested in `Model` or `Sequential`.

    This function uses `preprocess_weights_for_loading()` for converting nested
    layers.

    Arguments:
        weights: List of weights values (Numpy arrays).

    Returns:
        A list of weights values (Numpy arrays).
    """
        trainable_weights = weights[:len(layer.trainable_weights)]
        non_trainable_weights = weights[len(layer.trainable_weights):]

        new_trainable_weights = []
        new_non_trainable_weights = []

        for sublayer in layer.layers:
            num_trainable_weights = len(sublayer.trainable_weights)
            num_non_trainable_weights = len(sublayer.non_trainable_weights)
            if sublayer.weights:
                preprocessed = preprocess_weights_for_loading(
                    layer=sublayer,
                    weights=(
                        trainable_weights[:num_trainable_weights] +
                        non_trainable_weights[:num_non_trainable_weights]),
                    original_keras_version=original_keras_version,
                    original_backend=original_backend)
                new_trainable_weights.extend(
                    preprocessed[:num_trainable_weights])
                new_non_trainable_weights.extend(
                    preprocessed[num_trainable_weights:])

                trainable_weights = trainable_weights[num_trainable_weights:]
                non_trainable_weights = non_trainable_weights[
                    num_non_trainable_weights:]

        return new_trainable_weights + new_non_trainable_weights

    # Convert layers nested in Bidirectional/Model/Sequential.
    # Both transformation should be ran for both Keras 1->2 conversion
    # and for conversion of CuDNN layers.
    if layer.__class__.__name__ == 'Bidirectional':
        weights = convert_nested_bidirectional(weights)
    if layer.__class__.__name__ == 'TimeDistributed':
        weights = convert_nested_time_distributed(weights)
    elif layer.__class__.__name__ in ['Model', 'Sequential']:
        weights = convert_nested_model(weights)

    if original_keras_version == '1':
        if layer.__class__.__name__ == 'TimeDistributed':
            weights = preprocess_weights_for_loading(layer.layer, weights,
                                                     original_keras_version,
                                                     original_backend)

        if layer.__class__.__name__ == 'Conv1D':
            shape = weights[0].shape
            # Handle Keras 1.1 format
            if shape[:2] != (layer.kernel_size[0],
                             1) or shape[3] != layer.filters:
                # Legacy shape:
                # (filters, input_dim, filter_length, 1)
                assert shape[0] == layer.filters and shape[2:] == (
                    layer.kernel_size[0], 1)
                weights[0] = np.transpose(weights[0], (2, 3, 1, 0))
            weights[0] = weights[0][:, 0, :, :]

        if layer.__class__.__name__ == 'Conv2D':
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, kernel_rows, kernel_cols)
                # new: (kernel_rows, kernel_cols, stack_size, filters)
                weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

        if layer.__class__.__name__ == 'Conv2DTranspose':
            if layer.data_format == 'channels_last':
                # old: (kernel_rows, kernel_cols, stack_size, filters)
                # new: (kernel_rows, kernel_cols, filters, stack_size)
                weights[0] = np.transpose(weights[0], (0, 1, 3, 2))
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, kernel_rows, kernel_cols)
                # new: (kernel_rows, kernel_cols, filters, stack_size)
                weights[0] = np.transpose(weights[0], (2, 3, 0, 1))

        if layer.__class__.__name__ == 'Conv3D':
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, ...)
                # new: (..., stack_size, filters)
                weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0))

        if layer.__class__.__name__ == 'GRU':
            if len(weights) == 9:
                kernel = np.concatenate([weights[0], weights[3], weights[6]],
                                        axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[4], weights[7]], axis=-1)
                bias = np.concatenate([weights[2], weights[5], weights[8]],
                                      axis=-1)
                weights = [kernel, recurrent_kernel, bias]

        if layer.__class__.__name__ == 'LSTM':
            if len(weights) == 12:
                # old: i, c, f, o
                # new: i, f, c, o
                kernel = np.concatenate(
                    [weights[0], weights[6], weights[3], weights[9]], axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[7], weights[4], weights[10]], axis=-1)
                bias = np.concatenate(
                    [weights[2], weights[8], weights[5], weights[11]], axis=-1)
                weights = [kernel, recurrent_kernel, bias]

        if layer.__class__.__name__ == 'ConvLSTM2D':
            if len(weights) == 12:
                kernel = np.concatenate(
                    [weights[0], weights[6], weights[3], weights[9]], axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[7], weights[4], weights[10]], axis=-1)
                bias = np.concatenate(
                    [weights[2], weights[8], weights[5], weights[11]], axis=-1)
                if layer.data_format == 'channels_first':
                    # old: (filters, stack_size, kernel_rows, kernel_cols)
                    # new: (kernel_rows, kernel_cols, stack_size, filters)
                    kernel = np.transpose(kernel, (2, 3, 1, 0))
                    recurrent_kernel = np.transpose(recurrent_kernel,
                                                    (2, 3, 1, 0))
                weights = [kernel, recurrent_kernel, bias]

    conv_layers = [
        'Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'
    ]
    if layer.__class__.__name__ in conv_layers:
        if original_backend == 'theano':
            weights[0] = conv_utils.convert_kernel(weights[0])
            if layer.__class__.__name__ == 'ConvLSTM2D':
                weights[1] = conv_utils.convert_kernel(weights[1])
        if K.int_shape(layer.weights[0]) != weights[0].shape:
            weights[0] = np.transpose(weights[0], (3, 2, 0, 1))
            if layer.__class__.__name__ == 'ConvLSTM2D':
                weights[1] = np.transpose(weights[1], (3, 2, 0, 1))

    # convert CuDNN layers
    return _convert_rnn_weights(layer, weights)
Esempio n. 36
0
    def call(self, inputs, training=None, mask=None):
        kwargs = {}
        if generic_utils.has_arg(self.layer.call, 'training'):
            kwargs['training'] = training

        input_shape = K.int_shape(inputs)
        if input_shape[0] and not self._always_use_reshape:
            inputs, row_lengths = K.convert_inputs_if_ragged(inputs)
            is_ragged_input = row_lengths is not None

            # batch size matters, use rnn-based implementation
            def step(x, _):
                output = self.layer(x, **kwargs)
                return output, []

            _, outputs, _ = K.rnn(step,
                                  inputs,
                                  initial_states=[],
                                  input_length=row_lengths[0]
                                  if is_ragged_input else input_shape[1],
                                  mask=mask,
                                  unroll=False)
            y = K.maybe_convert_to_ragged(is_ragged_input, outputs,
                                          row_lengths)
        else:
            # No batch size specified, therefore the layer will be able
            # to process batches of any size.
            # We can go with reshape-based implementation for performance.
            if isinstance(inputs, ragged_tensor.RaggedTensor):
                y = self.layer(inputs.values, **kwargs)
                y = ragged_tensor.RaggedTensor.from_row_lengths(
                    y,
                    inputs.nested_row_lengths()[0])
            else:
                input_length = input_shape[1]
                if not input_length:
                    input_length = array_ops.shape(inputs)[1]
                inner_input_shape = self._get_shape_tuple((-1, ), inputs, 2)
                # Shape: (num_samples * timesteps, ...). And track the
                # transformation in self._input_map.
                inputs = array_ops.reshape(inputs, inner_input_shape)
                # (num_samples * timesteps, ...)
                if generic_utils.has_arg(self.layer.call,
                                         'mask') and mask is not None:
                    inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
                    kwargs['mask'] = K.reshape(mask, inner_mask_shape)

                y = self.layer(inputs, **kwargs)

                # Shape: (num_samples, timesteps, ...)
                output_shape = self.compute_output_shape(input_shape).as_list()
                output_shape = self._get_shape_tuple((-1, input_length), y, 1,
                                                     output_shape[2:])
                y = array_ops.reshape(y, output_shape)
                if not context.executing_eagerly():
                    # Set the static shape for the result since it might be lost during
                    # array_ops reshape, eg, some `None` dim in the result could be
                    # inferred.
                    y.set_shape(self.compute_output_shape(input_shape))

        return y
Esempio n. 37
0
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
    y_true = array_ops.squeeze(y_true, [-1])

  return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k), axis=-1)
Esempio n. 38
0
def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, activation,
                 recurrent_activation, mask, time_major, go_backwards):
  """GRU with standard kernel implementation.

  This implementation can be run on all types of hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Arguments:
    inputs: Input tensor of GRU layer.
    init_h: Initial state tensor for the cell output.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. The bias contains the
      combined input_bias and recurrent_bias.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Binary tensor of shape `(samples, timesteps)` indicating whether
      a given timestep should be masked.
    time_major: Boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  input_bias, recurrent_bias = array_ops.unstack(bias)

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]

    # inputs projected by all gate matrices at once
    matrix_x = K.dot(cell_inputs, kernel)
    matrix_x = K.bias_add(matrix_x, input_bias)

    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)

    # hidden state projected by all gate matrices at once
    matrix_inner = K.dot(h_tm1, recurrent_kernel)
    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)

    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
                                                            axis=1)
    z = recurrent_activation(x_z + recurrent_z)
    r = recurrent_activation(x_r + recurrent_r)
    hh = activation(x_h + r * recurrent_h)

    # previous and candidate state mixed by update gate
    h = z * h_tm1 + (1 - z) * hh
    return h, [h]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return last_output, outputs, new_states[0], _runtime('cpu')
Esempio n. 39
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self.could_use_cudnn:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}

      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = _runtime('unknown')
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the CuDNN has an extra set of bias, those bias will be passed to
      # both normal and CuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs *= dropout_mask[0]
      cudnn_lstm_kwargs = {
          'inputs': inputs,
          'init_h': initial_state[0],
          'init_c': initial_state[1],
          'kernel': self.cell.kernel,
          'recurrent_kernel': self.cell.recurrent_kernel,
          'bias': self.cell.bias,
          'mask': mask,
          'time_major': self.time_major,
          'go_backwards': self.go_backwards
      }
      normal_lstm_kwargs = cudnn_lstm_kwargs.copy()
      normal_lstm_kwargs.update({
          'activation': self.activation,
          'recurrent_activation': self.recurrent_activation
      })

      if context.executing_eagerly():
        device_type = _get_context_device_type()
        can_use_gpu = (
            # Either user specified GPU or unspecified but GPU is available.
            (device_type == _GPU_DEVICE_NAME
             or (device_type is None and context.num_gpus() > 0))
            and
            (mask is None or is_sequence_right_padded(mask, self.time_major)))
        # Under eager context, check the device placement and prefer the
        # GPU implementation when GPU is available.
        if can_use_gpu:
          last_output, outputs, new_h, new_c, runtime = cudnn_lstm(
              **cudnn_lstm_kwargs)
        else:
          last_output, outputs, new_h, new_c, runtime = standard_lstm(
              **normal_lstm_kwargs)
      else:
        # Each time a `tf.function` is called, we will give it a unique
        # identifiable API name, so that Grappler won't get confused when it
        # sees multiple LSTM layers added into same graph, and it will be able
        # to pair up the different implementations across them.
        api_name = 'lstm_' + str(uuid.uuid4())
        defun_standard_lstm = _generate_defun_backend(
            api_name, _CPU_DEVICE_NAME, standard_lstm)
        defun_cudnn_lstm = _generate_defun_backend(
            api_name, _GPU_DEVICE_NAME, cudnn_lstm)

        # Call the normal LSTM impl and register the CuDNN impl function. The
        # grappler will kick in during session execution to optimize the graph.
        last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
            **normal_lstm_kwargs)

        def register_cudnn_defun():
          function.register(defun_cudnn_lstm, **cudnn_lstm_kwargs)
          # return some dummy value since the tf.cond require some return value.
          return 0
        if mask is None:
          register_cudnn_defun()
        else:
          # Only when seq_right_padded=True, CuDNN kernel can support that
          # properly.
          control_flow_ops.cond(is_sequence_right_padded(mask, self.time_major),
                                true_fn=register_cudnn_defun,
                                false_fn=lambda: 0)
      states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates, inputs)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
Esempio n. 40
0
    def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Args:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
        input to TimeDistributed. If static shape information is available for
        "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
        input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
        # cases need to call the layer.compute_mask when input_mask is None:
        # Masking layer and Embedding layer with mask_zero
        input_shape = nest.map_structure(
            lambda x: tensor_shape.TensorShape(backend.int_shape(x)), inputs)
        input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
        batch_size = tf_utils.convert_shapes(input_shape)
        batch_size = nest.flatten(batch_size)[0]
        is_ragged_input = nest.map_structure(
            lambda x: isinstance(x, ragged_tensor.RaggedTensor), inputs)
        is_ragged_input = generic_utils.to_list(nest.flatten(is_ragged_input))
        if batch_size and not self._always_use_reshape or any(is_ragged_input):
            # batch size matters, we currently do not handle mask explicitly, or if
            # the layer always uses reshape approach, or the input is a ragged tensor.
            return mask
        inner_mask = mask
        if inner_mask is not None:
            inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
            inner_mask = backend.reshape(inner_mask, inner_mask_shape)
        inner_input_shape = nest.map_structure(
            lambda tensor: self._get_shape_tuple((-1, ), tensor, 2), inputs)
        inner_inputs = nest.map_structure_up_to(inputs, array_ops.reshape,
                                                inputs, inner_input_shape)
        output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
        if output_mask is None:
            if mask is None:
                return None
            # input_mask is not None, and output_mask is None:
            # we should return a not-None mask
            output_mask = mask
            for _ in range(2, len(backend.int_shape(mask))):
                output_mask = backend.any(output_mask, axis=-1)
        else:
            # output_mask is not None. We need to reshape it
            input_length = tf_utils.convert_shapes(input_shape)
            input_length = nest.flatten(input_length)[1]
            if not input_length:
                input_length = nest.map_structure(
                    lambda x: backend.shape(x)[1], inputs)
                input_length = nest.flatten(input_length)[0]
            output_mask_int_shape = backend.int_shape(output_mask)
            if output_mask_int_shape is None:
                # if the output_mask does not have a static shape,
                # its shape must be the same as mask's
                if mask is not None:
                    output_mask_int_shape = backend.int_shape(mask)
                else:
                    input_shape = generic_utils.to_list(
                        nest.flatten(input_shape))[0]
                    output_mask_int_shape = backend.compute_output_shape(
                        input_shape)[:-1]
            output_mask_shape = self._get_shape_tuple(
                (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
            output_mask = backend.reshape(output_mask, output_mask_shape)
        return output_mask
Esempio n. 41
0
def _adjust_block(p, ip, filters, block_id=None):
  """Adjusts the input `previous path` to match the shape of the `input`.

  Used in situations where the output number of filters needs to be changed.

  Arguments:
      p: Input tensor which needs to be modified
      ip: Input tensor whose shape needs to be matched
      filters: Number of output filters to be matched
      block_id: String block_id

  Returns:
      Adjusted Keras tensor
  """
  channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
  img_dim = 2 if K.image_data_format() == 'channels_first' else -2

  ip_shape = K.int_shape(ip)

  if p is not None:
    p_shape = K.int_shape(p)

  with K.name_scope('adjust_block'):
    if p is None:
      p = ip

    elif p_shape[img_dim] != ip_shape[img_dim]:
      with K.name_scope('adjust_reduction_block_%s' % block_id):
        p = Activation('relu', name='adjust_relu_1_%s' % block_id)(p)

        p1 = AveragePooling2D(
            (1, 1),
            strides=(2, 2),
            padding='valid',
            name='adjust_avg_pool_1_%s' % block_id)(
                p)
        p1 = Conv2D(
            filters // 2, (1, 1),
            padding='same',
            use_bias=False,
            name='adjust_conv_1_%s' % block_id,
            kernel_initializer='he_normal')(
                p1)

        p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
        p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2)
        p2 = AveragePooling2D(
            (1, 1),
            strides=(2, 2),
            padding='valid',
            name='adjust_avg_pool_2_%s' % block_id)(
                p2)
        p2 = Conv2D(
            filters // 2, (1, 1),
            padding='same',
            use_bias=False,
            name='adjust_conv_2_%s' % block_id,
            kernel_initializer='he_normal')(
                p2)

        p = concatenate([p1, p2], axis=channel_dim)
        p = BatchNormalization(
            axis=channel_dim,
            momentum=0.9997,
            epsilon=1e-3,
            name='adjust_bn_%s' % block_id)(
                p)

    elif p_shape[channel_dim] != filters:
      with K.name_scope('adjust_projection_block_%s' % block_id):
        p = Activation('relu')(p)
        p = Conv2D(
            filters, (1, 1),
            strides=(1, 1),
            padding='same',
            name='adjust_conv_projection_%s' % block_id,
            use_bias=False,
            kernel_initializer='he_normal')(
                p)
        p = BatchNormalization(
            axis=channel_dim,
            momentum=0.9997,
            epsilon=1e-3,
            name='adjust_bn_%s' % block_id)(
                p)
  return p
Esempio n. 42
0
  def __call__(self, inputs, initial_state=None, constants=None, **kwargs):
    """`Bidirectional.__call__` implements the same API as the wrapped `RNN`."""
    inputs, initial_state, constants = _standardize_args(
        inputs, initial_state, constants, self._num_constants)

    if isinstance(inputs, list):
      if len(inputs) > 1:
        initial_state = inputs[1:]
      inputs = inputs[0]

    if initial_state is None and constants is None:
      return super(Bidirectional, self).__call__(inputs, **kwargs)

    # Applies the same workaround as in `RNN.__call__`
    additional_inputs = []
    additional_specs = []
    if initial_state is not None:
      # Check if `initial_state` can be splitted into half
      num_states = len(initial_state)
      if num_states % 2 > 0:
        raise ValueError(
            'When passing `initial_state` to a Bidirectional RNN, '
            'the state should be a list containing the states of '
            'the underlying RNNs. '
            'Found: ' + str(initial_state))

      kwargs['initial_state'] = initial_state
      additional_inputs += initial_state
      state_specs = [InputSpec(shape=K.int_shape(state))
                     for state in initial_state]
      self.forward_layer.state_spec = state_specs[:num_states // 2]
      self.backward_layer.state_spec = state_specs[num_states // 2:]
      additional_specs += state_specs
    if constants is not None:
      kwargs['constants'] = constants
      additional_inputs += constants
      constants_spec = [InputSpec(shape=K.int_shape(constant))
                        for constant in constants]
      self.forward_layer.constants_spec = constants_spec
      self.backward_layer.constants_spec = constants_spec
      additional_specs += constants_spec

      self._num_constants = len(constants)
      self.forward_layer._num_constants = self._num_constants
      self.backward_layer._num_constants = self._num_constants

    is_keras_tensor = K.is_keras_tensor(additional_inputs[0])
    for tensor in additional_inputs:
      if K.is_keras_tensor(tensor) != is_keras_tensor:
        raise ValueError('The initial state of a Bidirectional'
                         ' layer cannot be specified with a mix of'
                         ' Keras tensors and non-Keras tensors'
                         ' (a "Keras tensor" is a tensor that was'
                         ' returned by a Keras layer, or by `Input`)')

    if is_keras_tensor:
      # Compute the full input spec, including state
      full_input = [inputs] + additional_inputs
      # The original input_spec is None since there could be a nested tensor
      # input. Update the input_spec to match the inputs.
      full_input_spec = [None for _ in range(len(nest.flatten(inputs)))
                        ] + additional_specs

      # Perform the call with temporarily replaced input_spec
      original_input_spec = self.input_spec
      self.input_spec = full_input_spec
      output = super(Bidirectional, self).__call__(full_input, **kwargs)
      self.input_spec = original_input_spec
      return output
    else:
      return super(Bidirectional, self).__call__(inputs, **kwargs)
Esempio n. 43
0
  def compute_mask(self, inputs, mask=None):
    """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Arguments:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
        input to TimeDistributed. If static shape information is available for
        "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
        input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
    # cases need to call the layer.compute_mask when input_mask is None:
    # Masking layer and Embedding layer with mask_zero
    input_shape = K.int_shape(inputs)
    if input_shape[0]:
      # batch size matters, we currently do not handle mask explicitly
      return mask
    inner_mask = mask
    if inner_mask is not None:
      inner_mask_shape = self._get_shape_tuple((-1,), mask, 2)
      inner_mask = K.reshape(inner_mask, inner_mask_shape)
    input_uid = generic_utils.object_list_uid(inputs)
    inner_inputs = self._input_map.get(input_uid, inputs)
    output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
    if output_mask is None:
      if mask is None:
        return None
      # input_mask is not None, and output_mask is None:
      # we should return a not-None mask
      output_mask = mask
      for _ in range(2, len(K.int_shape(mask))):
        output_mask = K.any(output_mask, axis=-1)
    else:
      # output_mask is not None. We need to reshape it
      input_length = input_shape[1]
      if not input_length:
        input_length = K.shape(inputs)[1]
      output_mask_int_shape = K.int_shape(output_mask)
      if output_mask_int_shape is None:
        # if the output_mask does not have a static shape,
        # its shape must be the same as mask's
        if mask is not None:
          output_mask_int_shape = K.int_shape(mask)
        else:
          output_mask_int_shape = K.compute_output_shape(input_shape)[:-1]
      output_mask_shape = self._get_shape_tuple(
          (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
      output_mask = K.reshape(output_mask, output_mask_shape)
    return output_mask
Esempio n. 44
0
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio,
                        activation, block_id):
  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
  shortcut = x
  prefix = 'expanded_conv/'
  infilters = backend.int_shape(x)[channel_axis]
  if block_id:
    # Expand
    prefix = 'expanded_conv_{}/'.format(block_id)
    x = layers.Conv2D(
        _depth(infilters * expansion),
        kernel_size=1,
        padding='same',
        use_bias=False,
        name=prefix + 'expand')(
            x)
    x = layers.BatchNormalization(
        axis=channel_axis,
        epsilon=1e-3,
        momentum=0.999,
        name=prefix + 'expand/BatchNorm')(
            x)
    x = activation(x)

  if stride == 2:
    x = layers.ZeroPadding2D(
        padding=imagenet_utils.correct_pad(x, kernel_size),
        name=prefix + 'depthwise/pad')(
            x)
  x = layers.DepthwiseConv2D(
      kernel_size,
      strides=stride,
      padding='same' if stride == 1 else 'valid',
      use_bias=False,
      name=prefix + 'depthwise')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'depthwise/BatchNorm')(
          x)
  x = activation(x)

  if se_ratio:
    x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

  x = layers.Conv2D(
      filters,
      kernel_size=1,
      padding='same',
      use_bias=False,
      name=prefix + 'project')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'project/BatchNorm')(
          x)

  if stride == 1 and infilters == filters:
    x = layers.Add(name=prefix + 'Add')([shortcut, x])
  return x
Esempio n. 45
0
def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
                  activation, recurrent_activation, mask, time_major,
                  go_backwards):
  """LSTM with standard kernel implementation.

  This implementation can be run on all types for hardware.

  This implementation lifts out all the layer weights and make them function
  parameters. It has same number of tensor input params as the CuDNN
  counterpart. The RNN step logic has been simplified, eg dropout and mask is
  removed since CuDNN implementation does not support that.

  Note that the first half of the bias tensor should be ignored by this impl.
  The CuDNN impl need an extra set of input gate bias. In order to make the both
  function take same shape of parameter, that extra set of bias is also feed
  here.

  Args:
    inputs: input tensor of LSTM layer.
    init_h: initial state tensor for the cell output.
    init_c: initial state tensor for the cell hidden state.
    kernel: weights for cell kernel.
    recurrent_kernel: weights for cell recurrent kernel.
    bias: weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    activation: Activation function to use for output.
    recurrent_activation: Activation function to use for hidden recurrent state.
    mask: Boolean tensor for mask out the steps within sequence.
    time_major: boolean, whether the inputs are in the format of
      [time, batch, feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.

  Returns:
    last_output: output tensor for the last timestep, which has shape
      [batch, units].
    outputs: output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: the cell output, which has same shape as init_h.
    state_1: the cell hidden state, which has same shape as init_c.
    runtime: constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should be used by user.
  """
  input_shape = K.int_shape(inputs)
  timesteps = input_shape[0] if time_major else input_shape[1]

  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = K.dot(cell_inputs, kernel)
    z += K.dot(h_tm1, recurrent_kernel)
    z = K.bias_add(z, bias)

    z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)

    i = recurrent_activation(z0)
    f = recurrent_activation(z1)
    c = f * c_tm1 + i * activation(z2)
    o = recurrent_activation(z3)

    h = o * activation(c)
    return h, [h, c]

  last_output, outputs, new_states = K.rnn(
      step,
      inputs, [init_h, init_c],
      constants=None,
      unroll=False,
      time_major=time_major,
      mask=mask,
      go_backwards=go_backwards,
      input_length=timesteps)
  return last_output, outputs, new_states[0], new_states[1], _runtime('cpu')
Esempio n. 46
0
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(
        shape=(3, ), dtype='int32', buffer=weights_file.read(12))
    if (major*10+minor)>=2 and major<1000 and minor<1000:
        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    input_layer = Input(shape=(None, None, 3))
    prev_layer = input_layer
    all_layers = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride>1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    pool_size=(size, size),
                    strides=(stride, stride),
                    padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            out_index.append(len(all_layers)-1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif section.startswith('net'):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index)==0: out_index.append(len(all_layers)-1)
    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
    print(model.summary())
    if args.weights_only:
        model.save_weights('{}'.format(output_path))
        print('Saved Keras weights to {}'.format(output_path))
    else:
        model.save('{}'.format(output_path))
        print('Saved Keras model to {}'.format(output_path))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))
Esempio n. 47
0
    # 将图像转换到np数组里面
    img = np.array(img)
    # 归一化像素
    img = img / 255.0
    # 将2-dim灰度数组转换成3-dimRGB数组
    if (len(img.shape) == 2):
        img = np.repeat(img[:, :, np.newaxis], 3, axis=2)
    return img


image_model = VGG16(include_top=True, weights='imagenet')  #加载预训练VGG16模型
transfer_layer = image_model.get_layer('fc2')  #将VGG16的最后一层fc2层去掉,替换成我们自己创建的传输层
image_model_transfer = Model(
    inputs=image_model.input,  #重新创建一个模型,这个模型没有最后fc2层,但是有传输层
    outputs=transfer_layer.output)
img_size = K.int_shape(image_model.input)[1:3]  #定义输入图像的大小
transfer_values_size = K.int_shape(transfer_layer.output)[1]  #定义输出的传输值向量的大小


def print_progress(count, max_count):  #打印处理进度函数
    # 完成的百分比
    pct_complete = count / max_count

    #打印处理进度
    msg = "\r- Progress: {0:.1%}".format(pct_complete)
    sys.stdout.write(msg)
    sys.stdout.flush()


def process_images(data_dir, filenames, batch_size=32):  #定义处理图片函数
Esempio n. 48
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. /
                       (1. + self.decay *
                        math_ops.cast(self.iterations, K.dtype(self.decay))))

        with ops.control_dependencies(
            [state_ops.assign_add(self.iterations, 1)]):
            t = math_ops.cast(self.iterations, K.floatx())
        lr_bc = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (
            1. - math_ops.pow(self.beta_1, t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        lams = [K.zeros(1, dtype=K.dtype(p)) for p in params]
        conds = [K.variable(False, dtype='bool') for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats + lams + conds

        for p, g, m, v, vhat, lam, cond in zip(params, grads, ms, vs, vhats,
                                               lams, conds):
            beta_g = m_switch(cond, 1.0, 1.0 - self.beta_1)
            m_t = (self.beta_1 * m) + beta_g * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(vhat_t) +
                                         self.epsilon)
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon)
            gamma_den = math_ops.reduce_sum(p_t_ada * g)
            gamma = math_ops.reduce_sum(gen_math_ops.square(p_t_ada)) / (
                math_ops.abs(gamma_den) +
                self.epsilon) * (gen_math_ops.sign(gamma_den) + self.epsilon)
            lam_t = (self.beta_2 * lam) + (1. - self.beta_2) * gamma
            lam_prime = lam / (1. - math_ops.pow(self.beta_2, t))
            lam_t_prime = lam_t / (1. - math_ops.pow(self.beta_2, t))
            lg_err = math_ops.abs(lam_t_prime - gamma)
            cond_update = gen_math_ops.logical_or(
                gen_math_ops.logical_and(
                    gen_math_ops.logical_and(self.iterations > 1,
                                             lg_err < 1e-5), lam_t > 0),
                cond)[0]
            lam_update = m_switch(cond_update, lam, lam_t)
            self.updates.append(state_ops.assign(lam, lam_update))
            self.updates.append(state_ops.assign(cond, cond_update))

            p_t_sgd = (1. - self.beta_1) * lam_prime * m_t

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))

            new_p = m_switch(cond, p - lr * p_t_sgd, p - lr * p_t_ada)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
  """Adds a Inception-ResNet block.

  This function builds 3 types of Inception-ResNet blocks mentioned
  in the paper, controlled by the `block_type` argument (which is the
  block name used in the official TF-slim implementation):
      - Inception-ResNet-A: `block_type='block35'`
      - Inception-ResNet-B: `block_type='block17'`
      - Inception-ResNet-C: `block_type='block8'`

  Arguments:
      x: input tensor.
      scale: scaling factor to scale the residuals (i.e., the output of
          passing `x` through an inception module) before adding them
          to the shortcut branch. Let `r` be the output from the residual
            branch,
          the output of this block will be `x + scale * r`.
      block_type: `'block35'`, `'block17'` or `'block8'`, determines
          the network structure in the residual branch.
      block_idx: an `int` used for generating layer names. The Inception-ResNet
        blocks
          are repeated many times in this network. We use `block_idx` to
            identify
          each of the repetitions. For example, the first Inception-ResNet-A
            block
          will have `block_type='block35', block_idx=0`, ane the layer names
            will have
          a common prefix `'block35_0'`.
      activation: activation function to use at the end of the block.
          When `activation=None`, no activation is applied
          (i.e., "linear" activation: `a(x) = x`).

  Returns:
      Output tensor for the block.

  Raises:
      ValueError: if `block_type` is not one of `'block35'`,
          `'block17'` or `'block8'`.
  """
  if block_type == 'block35':
    branch_0 = conv2d_bn(x, 32, 1)
    branch_1 = conv2d_bn(x, 32, 1)
    branch_1 = conv2d_bn(branch_1, 32, 3)
    branch_2 = conv2d_bn(x, 32, 1)
    branch_2 = conv2d_bn(branch_2, 48, 3)
    branch_2 = conv2d_bn(branch_2, 64, 3)
    branches = [branch_0, branch_1, branch_2]
  elif block_type == 'block17':
    branch_0 = conv2d_bn(x, 192, 1)
    branch_1 = conv2d_bn(x, 128, 1)
    branch_1 = conv2d_bn(branch_1, 160, [1, 7])
    branch_1 = conv2d_bn(branch_1, 192, [7, 1])
    branches = [branch_0, branch_1]
  elif block_type == 'block8':
    branch_0 = conv2d_bn(x, 192, 1)
    branch_1 = conv2d_bn(x, 192, 1)
    branch_1 = conv2d_bn(branch_1, 224, [1, 3])
    branch_1 = conv2d_bn(branch_1, 256, [3, 1])
    branches = [branch_0, branch_1]
  else:
    raise ValueError('Unknown Inception-ResNet block type. '
                     'Expects "block35", "block17" or "block8", '
                     'but got: ' + str(block_type))

  block_name = block_type + '_' + str(block_idx)
  channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
  mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches)
  up = conv2d_bn(
      mixed,
      K.int_shape(x)[channel_axis],
      1,
      activation=None,
      use_bias=True,
      name=block_name + '_conv')

  x = Lambda(
      lambda inputs, scale: inputs[0] + inputs[1] * scale,
      output_shape=K.int_shape(x)[1:],
      arguments={'scale': scale},
      name=block_name)([x, up])
  if activation is not None:
    x = Activation(activation, name=block_name + '_ac')(x)
  return x
Esempio n. 50
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. /
                       (1. + self.decay *
                        math_ops.cast(self.iterations, K.dtype(self.decay))))

        t = math_ops.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (
            1. - 0.5 *
            (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (
            1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96),
                                     (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + self.beta_g * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                self.updates.append(state_ops.assign(vhat, vhat_t))
                v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
            else:
                v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
            m_t_bar = (self.beta_g / (1. - self.beta_1)) * (
                1. -
                momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))

            p_t_ada = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) +
                                          self.epsilon)
            p_t_sgd = p - self.lr_boost * lr * m_t_bar

            new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Esempio n. 51
0
def _adjust_block(p, ip, filters, block_id=None):
    """Adjusts the input `previous path` to match the shape of the `input`.

  Used in situations where the output number of filters needs to be changed.

  Arguments:
      p: Input tensor which needs to be modified
      ip: Input tensor whose shape needs to be matched
      filters: Number of output filters to be matched
      block_id: String block_id

  Returns:
      Adjusted Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    img_dim = 2 if backend.image_data_format() == 'channels_first' else -2

    ip_shape = backend.int_shape(ip)

    if p is not None:
        p_shape = backend.int_shape(p)

    with backend.name_scope('adjust_block'):
        if p is None:
            p = ip

        elif p_shape[img_dim] != ip_shape[img_dim]:
            with backend.name_scope('adjust_reduction_block_%s' % block_id):
                p = layers.Activation('relu',
                                      name='adjust_relu_1_%s' % block_id)(p)
                p1 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_1_%s' % block_id)(p)
                p1 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_1_%s' % block_id,
                                   kernel_initializer='he_normal')(p1)

                p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
                p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2)
                p2 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_2_%s' % block_id)(p2)
                p2 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_2_%s' % block_id,
                                   kernel_initializer='he_normal')(p2)

                p = layers.concatenate([p1, p2], axis=channel_dim)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)

        elif p_shape[channel_dim] != filters:
            with backend.name_scope('adjust_projection_block_%s' % block_id):
                p = layers.Activation('relu')(p)
                p = layers.Conv2D(filters, (1, 1),
                                  strides=(1, 1),
                                  padding='same',
                                  name='adjust_conv_projection_%s' % block_id,
                                  use_bias=False,
                                  kernel_initializer='he_normal')(p)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)
    return p
Esempio n. 52
0
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                classifier_activation='softmax'):
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    layer_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format() == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
                input_shape = (3, cols, rows)
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]
                input_shape = (cols, rows, 3)
    # If input_shape is None and input_tensor is None using standart shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if (not minimalistic and alpha not in [0.75, 1.0]
                or minimalistic and alpha != 1.0):
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            logging.warning('`input_shape` is undefined or non-square, '
                            'or `rows` is not 224.'
                            ' Weights for input shape (224, 224) will be'
                            ' loaded as the default.')

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = img_input
    x = layers.Rescaling(scale=1. / 127.5, offset=-1.)(x)
    x = layers.Conv2D(16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      name='Conv')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv/BatchNorm')(x)
    x = activation(x)

    x = stack_fn(x, kernel, activation, se_ratio)

    last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)
    x = layers.Conv2D(last_conv_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1/BatchNorm')(x)
    x = activation(x)
    x = layers.GlobalAveragePooling2D(keepdims=True)(x)
    x = layers.Conv2D(last_point_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=True,
                      name='Conv_2')(x)
    x = activation(x)

    if include_top:
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate)(x)
        x = layers.Conv2D(classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits')(x)
        x = layers.Flatten()(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Activation(activation=classifier_activation,
                              name='Predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = '{}{}_224_{}_float'.format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHT_PATH + file_name,
                                           cache_subdir='models',
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 53
0
    def compute_mask(self, inputs, mask=None):
        """Computes an output mask tensor for Embedding layer.

    This is based on the inputs, mask, and the inner layer.
    If batch size is specified:
    Simply return the input `mask`. (An rnn-based implementation with
    more than one rnn inputs is required but not supported in tf.keras yet.)
    Otherwise we call `compute_mask` of the inner layer at each time step.
    If the output mask at each time step is not `None`:
    (E.g., inner layer is Masking or RNN)
    Concatenate all of them and return the concatenation.
    If the output mask at each time step is `None` and the input mask is not
    `None`:(E.g., inner layer is Dense)
    Reduce the input_mask to 2 dimensions and return it.
    Otherwise (both the output mask and the input mask are `None`):
    (E.g., `mask` is not used at all)
    Return `None`.

    Arguments:
      inputs: Tensor with shape [batch size, timesteps, ...] indicating the
          input to TimeDistributed. If static shape information is available for
          "batch size", `mask` is returned unmodified.
      mask: Either None (indicating no masking) or a Tensor indicating the
          input mask for TimeDistributed. The shape can be static or dynamic.

    Returns:
      Either None (no masking), or a [batch size, timesteps, ...] Tensor with
      an output mask for the TimeDistributed layer with the shape beyond the
      second dimension being the value of the input mask shape(if the computed
      output mask is none), an output mask with the shape beyond the first
      dimension being the value of the mask shape(if mask is not None) or
      output mask with the shape beyond the first dimension being the
      value of the computed output shape.

    """
        # cases need to call the layer.compute_mask when input_mask is None:
        # Masking layer and Embedding layer with mask_zero
        input_shape = K.int_shape(inputs)
        if input_shape[0]:
            # batch size matters, we currently do not handle mask explicitly
            return mask
        inner_mask = mask
        if inner_mask is not None:
            inner_mask_shape = self._get_shape_tuple((-1, ), mask, 2)
            inner_mask = K.reshape(inner_mask, inner_mask_shape)
        input_uid = generic_utils.object_list_uid(inputs)
        inner_inputs = self._input_map.get(input_uid, inputs)
        output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
        if output_mask is None:
            if mask is None:
                return None
            # input_mask is not None, and output_mask is None:
            # we should return a not-None mask
            output_mask = mask
            for _ in range(2, len(K.int_shape(mask))):
                output_mask = K.any(output_mask, axis=-1)
        else:
            # output_mask is not None. We need to reshape it
            input_length = input_shape[1]
            if not input_length:
                input_length = K.shape(inputs)[1]
            output_mask_int_shape = K.int_shape(output_mask)
            if output_mask_int_shape is None:
                # if the output_mask does not have a static shape,
                # its shape must be the same as mask's
                if mask is not None:
                    output_mask_int_shape = K.int_shape(mask)
                else:
                    output_mask_int_shape = K.compute_output_shape(
                        input_shape)[:-1]
            output_mask_shape = self._get_shape_tuple(
                (-1, input_length), output_mask, 1, output_mask_int_shape[1:])
            output_mask = K.reshape(output_mask, output_mask_shape)
        return output_mask
Esempio n. 54
0
 def _create_all_weights(self, params):
     shapes = [backend.int_shape(p) for p in params]
     moments = [backend.zeros(shape) for shape in shapes]
     self.weights = [self.iterations] + moments
     return moments
Esempio n. 55
0
def load_weights_from_hdf5_group_by_name(f, layers, skip_mismatch=False):
    """Implements name-based weight loading.

  (instead of topological weight loading).

  Layers that have no matching name are skipped.

  Arguments:
      f: A pointer to a HDF5 group.
      layers: a list of target layers.
      skip_mismatch: Boolean, whether to skip loading of layers
          where there is a mismatch in the number of weights,
          or a mismatch in the shape of the weights.

  Raises:
      ValueError: in case of mismatch between provided layers
          and weights file and skip_match=False.
  """
    if 'keras_version' in f.attrs:
        original_keras_version = f.attrs['keras_version'].decode('utf8')
    else:
        original_keras_version = '1'
    if 'backend' in f.attrs:
        original_backend = f.attrs['backend'].decode('utf8')
    else:
        original_backend = None

    # New file format.
    layer_names = load_attributes_from_hdf5_group(f, 'layer_names')

    # Reverse index of layer name to list of layers with name.
    index = {}
    for layer in layers:
        if layer.name:
            index.setdefault(layer.name, []).append(layer)

    # We batch weight value assignments in a single backend call
    # which provides a speedup in TensorFlow.
    weight_value_tuples = []
    for k, name in enumerate(layer_names):
        g = f[name]
        weight_names = load_attributes_from_hdf5_group(g, 'weight_names')
        weight_values = [
            np.asarray(g[weight_name]) for weight_name in weight_names
        ]

        for layer in index.get(name, []):
            symbolic_weights = _legacy_weights(layer)
            weight_values = preprocess_weights_for_loading(
                layer, weight_values, original_keras_version, original_backend)
            if len(weight_values) != len(symbolic_weights):
                if skip_mismatch:
                    logging.warning(
                        'Skipping loading of weights for '
                        'layer {}'.format(layer.name) + ' due to mismatch '
                        'in number of weights ({} vs {}).'.format(
                            len(symbolic_weights), len(weight_values)))
                    continue
                raise ValueError('Layer #' + str(k) + ' (named "' +
                                 layer.name + '") expects ' +
                                 str(len(symbolic_weights)) +
                                 ' weight(s), but the saved weights' +
                                 ' have ' + str(len(weight_values)) +
                                 ' element(s).')
            # Set values.
            for i in range(len(weight_values)):
                if K.int_shape(symbolic_weights[i]) != weight_values[i].shape:
                    if skip_mismatch:
                        logging.warning('Skipping loading of weights for '
                                        'layer {}'.format(layer.name) +
                                        ' due to '
                                        'mismatch in shape ({} vs {}).'.format(
                                            symbolic_weights[i].shape,
                                            weight_values[i].shape))
                        continue
                    raise ValueError('Layer #' + str(k) + ' (named "' +
                                     layer.name + '"), weight ' +
                                     str(symbolic_weights[i]) +
                                     ' has shape {}'.format(
                                         K.int_shape(symbolic_weights[i])) +
                                     ', but the saved weight has shape ' +
                                     str(weight_values[i].shape) + '.')

                else:
                    weight_value_tuples.append(
                        (symbolic_weights[i], weight_values[i]))
    K.batch_set_value(weight_value_tuples)
Esempio n. 56
0
 def _create_all_weights(self, params):
     shapes = [backend.int_shape(p) for p in params]
     accumulators = [backend.zeros(shape) for shape in shapes]
     delta_accumulators = [backend.zeros(shape) for shape in shapes]
     self.weights = accumulators + delta_accumulators
     return accumulators, delta_accumulators
Esempio n. 57
0
def load_weights_from_hdf5_group_by_name(f, layers):
  """Implements name-based weight loading.

  (instead of topological weight loading).

  Layers that have no matching name are skipped.

  Arguments:
      f: A pointer to a HDF5 group.
      layers: a list of target layers.

  Raises:
      ValueError: in case of mismatch between provided layers
          and weights file.
  """
  if 'keras_version' in f.attrs:
    original_keras_version = f.attrs['keras_version'].decode('utf8')
  else:
    original_keras_version = '1'
  if 'backend' in f.attrs:
    original_backend = f.attrs['backend'].decode('utf8')
  else:
    original_backend = None

  # New file format.
  layer_names = load_attributes_from_hdf5_group(f, 'layer_names')

  # Reverse index of layer name to list of layers with name.
  index = {}
  for layer in layers:
    if layer.name:
      index.setdefault(layer.name, []).append(layer)

  # We batch weight value assignments in a single backend call
  # which provides a speedup in TensorFlow.
  weight_value_tuples = []
  for k, name in enumerate(layer_names):
    g = f[name]
    weight_names = load_attributes_from_hdf5_group(g, 'weight_names')
    weight_values = [np.asarray(g[weight_name]) for weight_name in weight_names]

    for layer in index.get(name, []):
      symbolic_weights = layer.weights
      weight_values = preprocess_weights_for_loading(
          layer, weight_values, original_keras_version, original_backend)
      if len(weight_values) != len(symbolic_weights):
        raise ValueError('Layer #' + str(k) + ' (named "' + layer.name +
                         '") expects ' + str(len(symbolic_weights)) +
                         ' weight(s), but the saved weights' + ' have ' +
                         str(len(weight_values)) + ' element(s).')
      # Set values.
      for i in range(len(weight_values)):
        if K.int_shape(symbolic_weights[i]) != weight_values[i].shape:
          raise ValueError('Layer #' + str(k) +' (named "' + layer.name +
                           '"), weight ' + str(symbolic_weights[i]) +
                           ' has shape {}'.format(K.int_shape(
                               symbolic_weights[i])) +
                           ', but the saved weight has shape ' +
                           str(weight_values[i].shape) + '.')

        else:
          weight_value_tuples.append((symbolic_weights[i], weight_values[i]))
  K.batch_set_value(weight_value_tuples)
Esempio n. 58
0
def preprocess_weights_for_loading(layer,
                                   weights,
                                   original_keras_version=None,
                                   original_backend=None):
    """Converts layers weights from Keras 1 format to Keras 2.

  Arguments:
      layer: Layer instance.
      weights: List of weights values (Numpy arrays).
      original_keras_version: Keras version for the weights, as a string.
      original_backend: Keras backend the weights were trained with,
          as a string.

  Returns:
      A list of weights values (Numpy arrays).
  """
    if layer.__class__.__name__ == 'Bidirectional':
        num_weights_per_layer = len(weights) // 2
        forward_weights = preprocess_weights_for_loading(
            layer.forward_layer, weights[:num_weights_per_layer],
            original_keras_version, original_backend)
        backward_weights = preprocess_weights_for_loading(
            layer.backward_layer, weights[num_weights_per_layer:],
            original_keras_version, original_backend)
        weights = forward_weights + backward_weights

    if original_keras_version == '1':
        if layer.__class__.__name__ == 'TimeDistributed':
            weights = preprocess_weights_for_loading(layer.layer, weights,
                                                     original_keras_version,
                                                     original_backend)

        if layer.__class__.__name__ == 'Conv1D':
            shape = weights[0].shape
            # Handle Keras 1.1 format
            if shape[:2] != (layer.kernel_size[0],
                             1) or shape[3] != layer.filters:
                # Legacy shape:
                # (filters, input_dim, filter_length, 1)
                assert shape[0] == layer.filters and shape[2:] == (
                    layer.kernel_size[0], 1)
                weights[0] = np.transpose(weights[0], (2, 3, 1, 0))
            weights[0] = weights[0][:, 0, :, :]

        if layer.__class__.__name__ == 'Conv2D':
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, kernel_rows, kernel_cols)
                # new: (kernel_rows, kernel_cols, stack_size, filters)
                weights[0] = np.transpose(weights[0], (2, 3, 1, 0))

        if layer.__class__.__name__ == 'Conv2DTranspose':
            if layer.data_format == 'channels_last':
                # old: (kernel_rows, kernel_cols, stack_size, filters)
                # new: (kernel_rows, kernel_cols, filters, stack_size)
                weights[0] = np.transpose(weights[0], (0, 1, 3, 2))
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, kernel_rows, kernel_cols)
                # new: (kernel_rows, kernel_cols, filters, stack_size)
                weights[0] = np.transpose(weights[0], (2, 3, 0, 1))

        if layer.__class__.__name__ == 'Conv3D':
            if layer.data_format == 'channels_first':
                # old: (filters, stack_size, ...)
                # new: (..., stack_size, filters)
                weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0))

        if layer.__class__.__name__ == 'GRU':
            if len(weights) == 9:
                kernel = np.concatenate([weights[0], weights[3], weights[6]],
                                        axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[4], weights[7]], axis=-1)
                bias = np.concatenate([weights[2], weights[5], weights[8]],
                                      axis=-1)
                weights = [kernel, recurrent_kernel, bias]

        if layer.__class__.__name__ == 'LSTM':
            if len(weights) == 12:
                # old: i, c, f, o
                # new: i, f, c, o
                kernel = np.concatenate(
                    [weights[0], weights[6], weights[3], weights[9]], axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[7], weights[4], weights[10]], axis=-1)
                bias = np.concatenate(
                    [weights[2], weights[8], weights[5], weights[11]], axis=-1)
                weights = [kernel, recurrent_kernel, bias]

        if layer.__class__.__name__ == 'ConvLSTM2D':
            if len(weights) == 12:
                kernel = np.concatenate(
                    [weights[0], weights[6], weights[3], weights[9]], axis=-1)
                recurrent_kernel = np.concatenate(
                    [weights[1], weights[7], weights[4], weights[10]], axis=-1)
                bias = np.concatenate(
                    [weights[2], weights[8], weights[5], weights[11]], axis=-1)
                if layer.data_format == 'channels_first':
                    # old: (filters, stack_size, kernel_rows, kernel_cols)
                    # new: (kernel_rows, kernel_cols, stack_size, filters)
                    kernel = np.transpose(kernel, (2, 3, 1, 0))
                    recurrent_kernel = np.transpose(recurrent_kernel,
                                                    (2, 3, 1, 0))
                weights = [kernel, recurrent_kernel, bias]

        if layer.__class__.__name__ in ['Model', 'Sequential']:
            new_weights = []
            # trainable weights
            for sublayer in layer.layers:
                num_weights = len(sublayer.trainable_weights)
                if num_weights > 0:
                    new_weights.extend(
                        preprocess_weights_for_loading(
                            layer=sublayer,
                            weights=weights[:num_weights],
                            original_keras_version=original_keras_version,
                            original_backend=original_backend))
                    weights = weights[num_weights:]

            # non-trainable weights
            for sublayer in layer.layers:
                num_weights = len([
                    l for l in sublayer.weights
                    if l not in sublayer.trainable_weights
                ])
                if num_weights > 0:
                    new_weights.extend(
                        preprocess_weights_for_loading(
                            layer=sublayer,
                            weights=weights[:num_weights],
                            original_keras_version=original_keras_version,
                            original_backend=original_backend))
                    weights = weights[num_weights:]
            weights = new_weights

    conv_layers = [
        'Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'
    ]
    if layer.__class__.__name__ in conv_layers:
        if original_backend == 'theano':
            weights[0] = conv_utils.convert_kernel(weights[0])
            if layer.__class__.__name__ == 'ConvLSTM2D':
                weights[1] = conv_utils.convert_kernel(weights[1])
        if K.int_shape(layer.weights[0]) != weights[0].shape:
            weights[0] = np.transpose(weights[0], (3, 2, 0, 1))
            if layer.__class__.__name__ == 'ConvLSTM2D':
                weights[1] = np.transpose(weights[1], (3, 2, 0, 1))

    return _convert_rnn_weights(layer, weights)
Esempio n. 59
0
def block3(x,
           filters,
           kernel_size=3,
           stride=1,
           groups=32,
           conv_shortcut=True,
           name=None):
    """A residual block.
  Arguments:
    x: input tensor.
    filters: integer, filters of the bottleneck layer.
    kernel_size: default 3, kernel size of the bottleneck layer.
    stride: default 1, stride of the first layer.
    groups: default 32, group size for grouped convolution.
    conv_shortcut: default True, use convolution shortcut if True,
        otherwise identity shortcut.
    name: string, block label.
  Returns:
    Output tensor for the residual block.
  """
    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    if conv_shortcut:
        shortcut = layers.Conv2D((64 // groups) * filters,
                                 1,
                                 strides=stride,
                                 use_bias=False,
                                 name=name + '_0_conv')(x)
        shortcut = tf.keras.layers.experimental.SyncBatchNormalization(
            axis=bn_axis, name=name + '_0_bn')(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x)
    x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis,
                                                            name=name +
                                                            '_1_bn')(x)
    x = layers.Activation('relu', name=name + '_1_relu')(x)

    c = filters // groups
    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
    x = layers.DepthwiseConv2D(kernel_size,
                               strides=stride,
                               depth_multiplier=c,
                               use_bias=False,
                               name=name + '_2_conv')(x)
    x_shape = backend.int_shape(x)[1:-1]
    x = layers.Reshape(x_shape + (groups, c, c))(x)
    x = layers.Lambda(lambda x: sum(x[:, :, :, :, i] for i in range(c)),
                      name=name + '_2_reduce')(x)
    x = layers.Reshape(x_shape + (filters, ))(x)
    x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis,
                                                            name=name +
                                                            '_2_bn')(x)
    x = layers.Activation('relu', name=name + '_2_relu')(x)

    x = layers.Conv2D((64 // groups) * filters,
                      1,
                      use_bias=False,
                      name=name + '_3_conv')(x)
    x = tf.keras.layers.experimental.SyncBatchNormalization(axis=bn_axis,
                                                            name=name +
                                                            '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.Activation('relu', name=name + '_out')(x)
    return x
  def call(self,
           inputs,
           mask=None,
           training=None,
           initial_state=None,
           constants=None):
    # note that the .build() method of subclasses MUST define
    # self.input_spec and self.state_spec with complete input shapes.
    if isinstance(inputs, list):
      inputs = inputs[0]
    if initial_state is not None:
      pass
    elif self.stateful:
      initial_state = self.states
    else:
      initial_state = self.get_initial_state(inputs)

    if isinstance(mask, list):
      mask = mask[0]

    if len(initial_state) != len(self.states):
      raise ValueError('Layer has ' + str(len(self.states)) +
                       ' states but was passed ' +
                       str(len(initial_state)) +
                       ' initial states.')
    timesteps = K.int_shape(inputs)[1]

    kwargs = {}
    if generic_utils.has_arg(self.cell.call, 'training'):
      kwargs['training'] = training

    if constants:
      if not generic_utils.has_arg(self.cell.call, 'constants'):
        raise ValueError('RNN cell does not support constants')

      def step(inputs, states):
        constants = states[-self._num_constants:]
        states = states[:-self._num_constants]
        return self.cell.call(inputs, states, constants=constants,
                              **kwargs)
    else:
      def step(inputs, states):
        return self.cell.call(inputs, states, **kwargs)

    last_output, outputs, states = K.rnn(step,
                                         inputs,
                                         initial_state,
                                         constants=constants,
                                         go_backwards=self.go_backwards,
                                         mask=mask,
                                         input_length=timesteps)
    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(K.update(self.states[i], states[i]))
      self.add_update(updates, inputs=True)

    if self.return_sequences:
      output = outputs
    else:
      output = last_output

    # Properly set learning phase
    if getattr(last_output, '_uses_learning_phase', False):
      output._uses_learning_phase = True

    if self.return_state:
      if not isinstance(states, (list, tuple)):
        states = [states]
      else:
        states = list(states)
      return [output] + states
    else:
      return output