def get_data_shape():
  # input image dimensions
  img_rows, img_cols = 28, 28
  if backend.image_data_format() == 'channels_first':
    return 1, img_rows, img_cols
  else:
    return img_rows, img_cols, 1
Esempio n. 2
0
 def __init__(self, x, y, image_data_generator,
              batch_size=32,
              shuffle=False,
              sample_weight=None,
              seed=None,
              data_format=None,
              save_to_dir=None,
              save_prefix='',
              save_format='png',
              subset=None,
              dtype=None):
   if data_format is None:
     data_format = backend.image_data_format()
   kwargs = {}
   if 'dtype' in tf_inspect.getfullargspec(
       image.NumpyArrayIterator.__init__)[0]:
     if dtype is None:
       dtype = backend.floatx()
     kwargs['dtype'] = dtype
   super(NumpyArrayIterator, self).__init__(
       x, y, image_data_generator,
       batch_size=batch_size,
       shuffle=shuffle,
       sample_weight=sample_weight,
       seed=seed,
       data_format=data_format,
       save_to_dir=save_to_dir,
       save_prefix=save_prefix,
       save_format=save_format,
       subset=subset,
       **kwargs)
Esempio n. 3
0
def save_img(path,
             x,
             data_format=None,
             file_format=None,
             scale=True,
             **kwargs):
  """Saves an image stored as a Numpy array to a path or file object.

  Arguments:
      path: Path or file object.
      x: Numpy array.
      data_format: Image data format,
          either "channels_first" or "channels_last".
      file_format: Optional file format override. If omitted, the
          format to use is determined from the filename extension.
          If a file object was used instead of a filename, this
          parameter should always be used.
      scale: Whether to rescale image values to be within `[0, 255]`.
      **kwargs: Additional keyword arguments passed to `PIL.Image.save()`.
  """
  if data_format is None:
    data_format = backend.image_data_format()
  image.save_img(path,
                 x,
                 data_format=data_format,
                 file_format=file_format,
                 scale=scale, **kwargs)
Esempio n. 4
0
def array_to_img(x, data_format=None, scale=True, dtype=None):
  """Converts a 3D Numpy array to a PIL Image instance.

  Arguments:
      x: Input Numpy array.
      data_format: Image data format.
          either "channels_first" or "channels_last".
      scale: Whether to rescale image values
          to be within `[0, 255]`.
      dtype: Dtype to use.

  Returns:
      A PIL Image instance.

  Raises:
      ImportError: if PIL is not available.
      ValueError: if invalid `x` or `data_format` is passed.
  """

  if data_format is None:
    data_format = backend.image_data_format()
  kwargs = {}
  if 'dtype' in tf_inspect.getfullargspec(image.array_to_img)[0]:
    if dtype is None:
      dtype = backend.floatx()
    kwargs['dtype'] = dtype
  return image.array_to_img(x, data_format=data_format, scale=scale, **kwargs)
Esempio n. 5
0
def conv_block(x, growth_rate, name):
  """A building block for a dense block.

  Arguments:
      x: input tensor.
      growth_rate: float, growth rate at dense layers.
      name: string, block label.

  Returns:
      output tensor for the block.
  """
  bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
  x1 = BatchNormalization(
      axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(
          x)
  x1 = Activation('relu', name=name + '_0_relu')(x1)
  x1 = Conv2D(4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')(x1)
  x1 = BatchNormalization(
      axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(
          x1)
  x1 = Activation('relu', name=name + '_1_relu')(x1)
  x1 = Conv2D(
      growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')(
          x1)
  x = Concatenate(axis=bn_axis, name=name + '_concat')([x, x1])
  return x
Esempio n. 6
0
def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4):
    ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout
    Args:
        ip: Input keras tensor
        nb_filter: number of filters
        bottleneck: add bottleneck block
        dropout_rate: dropout rate
        weight_decay: weight decay factor
    Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
    '''
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip)
    x = Activation('relu')(x)

    if bottleneck:
        inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua

        x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
                   kernel_regularizer=l2(weight_decay))(x)
        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x)
        x = Activation('relu')(x)

    x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x)
    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x
Esempio n. 7
0
def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4,
                  grow_nb_filters=True, return_concat_list=False):
    ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
    Args:
        x: keras tensor
        nb_layers: the number of layers of conv_block to append to the model.
        nb_filter: number of filters
        growth_rate: growth rate
        bottleneck: bottleneck block
        dropout_rate: dropout rate
        weight_decay: weight decay factor
        grow_nb_filters: flag to decide to allow number of filters to grow
        return_concat_list: return the list of feature maps along with the actual output
    Returns: keras tensor with nb_layers of conv_block appended
    '''
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    x_list = [x]

    for i in range(nb_layers):
        cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay)
        x_list.append(cb)

        x = concatenate([x, cb], axis=concat_axis)

        if grow_nb_filters:
            nb_filter += growth_rate

    if return_concat_list:
        return x, nb_filter, x_list
    else:
        return x, nb_filter
Esempio n. 8
0
def load_data(label_mode='fine'):
  """Loads CIFAR100 dataset.

  Arguments:
      label_mode: one of "fine", "coarse".

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

  Raises:
      ValueError: in case of invalid `label_mode`.
  """
  if label_mode not in ['fine', 'coarse']:
    raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

  dirname = 'cifar-100-python'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
  path = get_file(dirname, origin=origin, untar=True)

  fpath = os.path.join(path, 'train')
  x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

  fpath = os.path.join(path, 'test')
  x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Esempio n. 9
0
 def prepare_simple_model(input_tensor, loss_name, target):
   axis = 1 if K.image_data_format() == 'channels_first' else -1
   loss = None
   num_channels = None
   activation = None
   if loss_name == 'sparse_categorical_crossentropy':
     loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
         y_true, y_pred, axis=axis)
     num_channels = np.amax(target) + 1
     activation = 'softmax'
   elif loss_name == 'categorical_crossentropy':
     loss = lambda y_true, y_pred: K.categorical_crossentropy(  # pylint: disable=g-long-lambda
         y_true, y_pred, axis=axis)
     num_channels = target.shape[axis]
     activation = 'softmax'
   elif loss_name == 'binary_crossentropy':
     loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred)  # pylint: disable=unnecessary-lambda
     num_channels = target.shape[axis]
     activation = 'sigmoid'
   predictions = Conv2D(num_channels,
                        1,
                        activation=activation,
                        kernel_initializer='ones',
                        bias_initializer='ones')(input_tensor)
   simple_model = keras.models.Model(inputs=input_tensor,
                                     outputs=predictions)
   simple_model.compile(optimizer='rmsprop', loss=loss)
   return simple_model
Esempio n. 10
0
def load_data():
  """Loads CIFAR10 dataset.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """
  dirname = 'cifar-10-batches-py'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
  path = get_file(dirname, origin=origin, untar=True)

  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  fpath = os.path.join(path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Esempio n. 11
0
def preprocess_input(x, data_format=None, mode='caffe'):
  """Preprocesses a tensor or Numpy array encoding a batch of images.

  Arguments:
      x: Input Numpy or symbolic tensor, 3D or 4D.
      data_format: Data format of the image tensor/array.
      mode: One of "caffe", "tf".
          - caffe: will convert the images from RGB to BGR,
              then will zero-center each color channel with
              respect to the ImageNet dataset,
              without scaling.
          - tf: will scale pixels between -1 and 1,
              sample-wise.

  Returns:
      Preprocessed tensor or Numpy array.

  Raises:
      ValueError: In case of unknown `data_format` argument.
  """
  if data_format is None:
    data_format = K.image_data_format()
  if data_format not in {'channels_first', 'channels_last'}:
    raise ValueError('Unknown data_format ' + str(data_format))

  if isinstance(x, np.ndarray):
    return _preprocess_numpy_input(x, data_format=data_format, mode=mode)
  else:
    return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
def convert(in_path, out_path):
    """Convert any Keras model to the frugally-deep model format."""

    assert K.backend() == "tensorflow"
    assert K.floatx() == "float32"
    assert K.image_data_format() == 'channels_last'

    print('loading {}'.format(in_path))
    model = load_model(in_path)

    # Force creation of underlying functional model.
    # see: https://github.com/fchollet/keras/issues/8136
    # Loss and optimizer type do not matter, since to don't train the model.
    model.compile(loss='mse', optimizer='sgd')

    model = convert_sequential_to_model(model)
    test_data = gen_test_data(model)

    json_output = {}
    json_output['architecture'] = json.loads(model.to_json())

    json_output['image_data_format'] = K.image_data_format()
    for depth in range(1, 3, 1):
        json_output['conv2d_valid_offset_depth_' + str(depth)] =\
            check_operation_offset(depth, offset_conv2d_eval, 'valid')
        json_output['conv2d_same_offset_depth_' + str(depth)] =\
            check_operation_offset(depth, offset_conv2d_eval, 'same')
        json_output['separable_conv2d_valid_offset_depth_' + str(depth)] =\
            check_operation_offset(depth, offset_sep_conv2d_eval, 'valid')
        json_output['separable_conv2d_same_offset_depth_' + str(depth)] =\
            check_operation_offset(depth, offset_sep_conv2d_eval, 'same')
    json_output['max_pooling_2d_valid_offset'] =\
        check_operation_offset(1, conv2d_offset_max_pool_eval, 'valid')
    json_output['max_pooling_2d_same_offset'] =\
        check_operation_offset(1, conv2d_offset_max_pool_eval, 'same')
    json_output['average_pooling_2d_valid_offset'] =\
        check_operation_offset(1, conv2d_offset_average_pool_eval, 'valid')
    json_output['average_pooling_2d_same_offset'] =\
        check_operation_offset(1, conv2d_offset_average_pool_eval, 'same')
    json_output['input_shapes'] = get_shapes(test_data['inputs'])
    json_output['output_shapes'] = get_shapes(test_data['outputs'])
    json_output['tests'] = [test_data]
    json_output['trainable_params'] = get_all_weights(model)

    print('writing {}'.format(out_path))
    write_text_file(out_path, json.dumps(
        json_output, allow_nan=False, indent=2, sort_keys=True))
Esempio n. 13
0
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
  """Adds an initial convolution layer (with batch normalization and relu6).

  Arguments:
      inputs: Input tensor of shape `(rows, cols, 3)`
          (with `channels_last` data format) or
          (3, rows, cols) (with `channels_first` data format).
          It should have exactly 3 inputs channels,
          and width and height should be no smaller than 32.
          E.g. `(224, 224, 3)` would be one valid value.
      filters: Integer, the dimensionality of the output space
          (i.e. the number of output filters in the convolution).
      alpha: controls the width of the network.
          - If `alpha` < 1.0, proportionally decreases the number
              of filters in each layer.
          - If `alpha` > 1.0, proportionally increases the number
              of filters in each layer.
          - If `alpha` = 1, default number of filters from the paper
               are used at each layer.
      kernel: An integer or tuple/list of 2 integers, specifying the
          width and height of the 2D convolution window.
          Can be a single integer to specify the same value for
          all spatial dimensions.
      strides: An integer or tuple/list of 2 integers,
          specifying the strides of the convolution along the width and height.
          Can be a single integer to specify the same value for
          all spatial dimensions.
          Specifying any stride value != 1 is incompatible with specifying
          any `dilation_rate` value != 1.

  Input shape:
      4D tensor with shape:
      `(samples, channels, rows, cols)` if data_format='channels_first'
      or 4D tensor with shape:
      `(samples, rows, cols, channels)` if data_format='channels_last'.

  Output shape:
      4D tensor with shape:
      `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
      or 4D tensor with shape:
      `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
      `rows` and `cols` values might have changed due to stride.

  Returns:
      Output tensor of block.
  """
  channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  filters = int(filters * alpha)
  x = ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs)
  x = Conv2D(
      filters,
      kernel,
      padding='valid',
      use_bias=False,
      strides=strides,
      name='conv1')(x)
  x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
  return Activation(relu6, name='conv1_relu')(x)
Esempio n. 14
0
  def set_model(self, model):
    """Sets Keras model and creates summary ops."""

    self.model = model
    self.sess = K.get_session()
    # only make histogram summary op if it hasn't already been made
    if self.histogram_freq and self.merged is None:
      for layer in self.model.layers:
        for weight in layer.weights:
          mapped_weight_name = weight.name.replace(':', '_')
          tf_summary.histogram(mapped_weight_name, weight)
          if self.write_images:
            w_img = array_ops.squeeze(weight)
            shape = K.int_shape(w_img)
            if len(shape) == 2:  # dense layer kernel case
              if shape[0] > shape[1]:
                w_img = array_ops.transpose(w_img)
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
            elif len(shape) == 3:  # convnet case
              if K.image_data_format() == 'channels_last':
                # switch to channels_first to display
                # every kernel as a separate image
                w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img,
                                        [shape[0], shape[1], shape[2], 1])
            elif len(shape) == 1:  # bias case
              w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
            else:
              # not possible to handle 3D convnets etc.
              continue

            shape = K.int_shape(w_img)
            assert len(shape) == 4 and shape[-1] in [1, 3, 4]
            tf_summary.image(mapped_weight_name, w_img)

        if self.write_grads:
          for weight in layer.trainable_weights:
            mapped_weight_name = weight.name.replace(':', '_')
            grads = model.optimizer.get_gradients(model.total_loss, weight)

            def is_indexed_slices(grad):
              return type(grad).__name__ == 'IndexedSlices'

            grads = [grad.values if is_indexed_slices(grad) else grad
                     for grad in grads]
            tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)

        if hasattr(layer, 'output'):
          tf_summary.histogram('{}_out'.format(layer.name), layer.output)
    self.merged = tf_summary.merge_all()

    if self.write_graph:
      self.writer = self._writer_class(self.log_dir, self.sess.graph)
    else:
      self.writer = self._writer_class(self.log_dir)
Esempio n. 15
0
def normalize_data_format(value):
  if value is None:
    value = backend.image_data_format()
  data_format = value.lower()
  if data_format not in {'channels_first', 'channels_last'}:
    raise ValueError('The `data_format` argument must be one of '
                     '"channels_first", "channels_last". Received: ' +
                     str(value))
  return data_format
Esempio n. 16
0
 def __init__(self, rate, data_format=None, **kwargs):
   super(SpatialDropout3D, self).__init__(rate, **kwargs)
   if data_format is None:
     data_format = K.image_data_format()
   if data_format not in {'channels_last', 'channels_first'}:
     raise ValueError('data_format must be in '
                      '{"channels_last", "channels_first"}')
   self.data_format = data_format
   self.input_spec = InputSpec(ndim=5)
Esempio n. 17
0
def identity_block(input_tensor, kernel_size, filters, stage, block):
  """The identity block is the block that has no conv layer at shortcut.

  # Arguments
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of
          middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names

  # Returns
      Output tensor for the block.
  """
  filters1, filters2, filters3 = filters
  if backend.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

  x = layers.Conv2D(filters1, (1, 1), use_bias=False,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2a')(input_tensor)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2a')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters2, kernel_size,
                    padding='same', use_bias=False,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2b')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2b')(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(filters3, (1, 1), use_bias=False,
                    kernel_initializer='he_normal',
                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                    name=conv_name_base + '2c')(x)
  x = layers.BatchNormalization(axis=bn_axis,
                                momentum=BATCH_NORM_DECAY,
                                epsilon=BATCH_NORM_EPSILON,
                                name=bn_name_base + '2c')(x)

  x = layers.add([x, input_tensor])
  x = layers.Activation('relu')(x)
  return x
Esempio n. 18
0
 def __init__(self,
              featurewise_center=False,
              samplewise_center=False,
              featurewise_std_normalization=False,
              samplewise_std_normalization=False,
              zca_whitening=False,
              zca_epsilon=1e-6,
              rotation_range=0,
              width_shift_range=0.,
              height_shift_range=0.,
              brightness_range=None,
              shear_range=0.,
              zoom_range=0.,
              channel_shift_range=0.,
              fill_mode='nearest',
              cval=0.,
              horizontal_flip=False,
              vertical_flip=False,
              rescale=None,
              preprocessing_function=None,
              data_format=None,
              validation_split=0.0,
              dtype=None):
   if data_format is None:
     data_format = backend.image_data_format()
   kwargs = {}
   if 'dtype' in tf_inspect.getfullargspec(
       image.ImageDataGenerator.__init__)[0]:
     if dtype is None:
       dtype = backend.floatx()
     kwargs['dtype'] = dtype
   super(ImageDataGenerator, self).__init__(
       featurewise_center=featurewise_center,
       samplewise_center=samplewise_center,
       featurewise_std_normalization=featurewise_std_normalization,
       samplewise_std_normalization=samplewise_std_normalization,
       zca_whitening=zca_whitening,
       zca_epsilon=zca_epsilon,
       rotation_range=rotation_range,
       width_shift_range=width_shift_range,
       height_shift_range=height_shift_range,
       brightness_range=brightness_range,
       shear_range=shear_range,
       zoom_range=zoom_range,
       channel_shift_range=channel_shift_range,
       fill_mode=fill_mode,
       cval=cval,
       horizontal_flip=horizontal_flip,
       vertical_flip=vertical_flip,
       rescale=rescale,
       preprocessing_function=preprocessing_function,
       data_format=data_format,
       validation_split=validation_split,
       **kwargs)
Esempio n. 19
0
def _separable_conv_block(ip,
                          filters,
                          kernel_size=(3, 3),
                          strides=(1, 1),
                          block_id=None):
  """Adds 2 blocks of [relu-separable conv-batchnorm].

  Arguments:
      ip: Input tensor
      filters: Number of output filters per layer
      kernel_size: Kernel size of separable convolutions
      strides: Strided convolution for downsampling
      block_id: String block_id

  Returns:
      A Keras tensor
  """
  channel_dim = 1 if K.image_data_format() == 'channels_first' else -1

  with K.name_scope('separable_conv_block_%s' % block_id):
    x = Activation('relu')(ip)
    x = SeparableConv2D(
        filters,
        kernel_size,
        strides=strides,
        name='separable_conv_1_%s' % block_id,
        padding='same',
        use_bias=False,
        kernel_initializer='he_normal')(
            x)
    x = BatchNormalization(
        axis=channel_dim,
        momentum=0.9997,
        epsilon=1e-3,
        name='separable_conv_1_bn_%s' % (block_id))(
            x)
    x = Activation('relu')(x)
    x = SeparableConv2D(
        filters,
        kernel_size,
        name='separable_conv_2_%s' % block_id,
        padding='same',
        use_bias=False,
        kernel_initializer='he_normal')(
            x)
    x = BatchNormalization(
        axis=channel_dim,
        momentum=0.9997,
        epsilon=1e-3,
        name='separable_conv_2_bn_%s' % (block_id))(
            x)
  return x
Esempio n. 20
0
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2,
                                                                          2)):
  """A block that has a conv layer at shortcut.

  Arguments:
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names
      strides: Strides for the first conv layer in the block.

  Returns:
      Output tensor for the block.

  Note that from stage 3,
  the first conv layer at main path is with strides=(2, 2)
  And the shortcut should have strides=(2, 2) as well
  """
  filters1, filters2, filters3 = filters
  if K.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

  x = Conv2D(
      filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(
          input_tensor)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
  x = Activation('relu')(x)

  x = Conv2D(
      filters2, kernel_size, padding='same', name=conv_name_base + '2b')(
          x)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
  x = Activation('relu')(x)

  x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

  shortcut = Conv2D(
      filters3, (1, 1), strides=strides, name=conv_name_base + '1')(
          input_tensor)
  shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)

  x = layers.add([x, shortcut])
  x = Activation('relu')(x)
  return x
Esempio n. 21
0
def get_input_datasets(use_bfloat16=False):
  """Downloads the MNIST dataset and creates train and eval dataset objects.

  Args:
    use_bfloat16: Boolean to determine if input should be cast to bfloat16

  Returns:
    Train dataset and eval dataset. The dataset doesn't include batch dim.

  """
  cast_dtype = dtypes.bfloat16 if use_bfloat16 else dtypes.float32

  # the data, split between train and test sets
  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  train_data_shape = (x_train.shape[0],) + get_data_shape()
  test_data_shape = (x_test.shape[0],) + get_data_shape()
  if backend.image_data_format() == 'channels_first':
    x_train = x_train.reshape(train_data_shape)
    x_test = x_test.reshape(test_data_shape)
  else:
    x_train = x_train.reshape(train_data_shape)
    x_test = x_test.reshape(test_data_shape)

  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255

  # convert class vectors to binary class matrices
  y_train = utils.to_categorical(y_train, NUM_CLASSES)
  y_test = utils.to_categorical(y_test, NUM_CLASSES)

  # train dataset
  train_ds = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
  # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done.
  train_ds = maybe_shard_dataset(train_ds)
  train_ds = train_ds.repeat()
  train_ds = train_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y))
  train_ds = train_ds.batch(64, drop_remainder=True)

  # eval dataset
  eval_ds = dataset_ops.Dataset.from_tensor_slices((x_test, y_test))
  # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done.
  eval_ds = maybe_shard_dataset(eval_ds)
  eval_ds = eval_ds.repeat()
  eval_ds = eval_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y))
  eval_ds = eval_ds.batch(64, drop_remainder=True)

  return train_ds, eval_ds
Esempio n. 22
0
 def __init__(self, pool_function, pool_size, strides,
              padding='valid', data_format='channels_last',
              name=None, **kwargs):
   super(Pooling1D, self).__init__(name=name, **kwargs)
   if data_format is None:
     data_format = backend.image_data_format()
   if strides is None:
     strides = pool_size
   self.pool_function = pool_function
   self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size')
   self.strides = conv_utils.normalize_tuple(strides, 1, 'strides')
   self.padding = conv_utils.normalize_padding(padding)
   self.data_format = conv_utils.normalize_data_format(data_format)
   self.input_spec = InputSpec(ndim=3)
Esempio n. 23
0
def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):
  """Utility function to apply conv + BN.

  Arguments:
      x: input tensor.
      filters: filters in `Conv2D`.
      num_row: height of the convolution kernel.
      num_col: width of the convolution kernel.
      padding: padding mode in `Conv2D`.
      strides: strides in `Conv2D`.
      name: name of the ops; will become `name + '_conv'`
          for the convolution and `name + '_bn'` for the
          batch norm layer.

  Returns:
      Output tensor after applying `Conv2D` and `BatchNormalization`.
  """
  if name is not None:
    bn_name = name + '_bn'
    conv_name = name + '_conv'
  else:
    bn_name = None
    conv_name = None
  if K.image_data_format() == 'channels_first':
    bn_axis = 1
  else:
    bn_axis = 3
  x = Conv2D(
      filters, (num_row, num_col),
      strides=strides,
      padding=padding,
      use_bias=False,
      name=conv_name)(
          x)
  x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
  x = Activation('relu', name=name)(x)
  return x
Esempio n. 24
0
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4):
    ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
    Args:
        ip: keras tensor
        nb_filter: number of filters
        compression: calculated as 1 - reduction. Reduces the number of feature maps
                    in the transition block.
        dropout_rate: dropout rate
        weight_decay: weight decay factor
    Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
    '''
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
               kernel_regularizer=l2(weight_decay))(x)
    x = AveragePooling2D((2, 2), strides=(2, 2))(x)

    return x
Esempio n. 25
0
 def __init__(self, directory, image_data_generator,
              target_size=(256, 256),
              color_mode='rgb',
              classes=None,
              class_mode='categorical',
              batch_size=32,
              shuffle=True,
              seed=None,
              data_format=None,
              save_to_dir=None,
              save_prefix='',
              save_format='png',
              follow_links=False,
              subset=None,
              interpolation='nearest',
              dtype=None):
   if data_format is None:
     data_format = backend.image_data_format()
   kwargs = {}
   if 'dtype' in tf_inspect.getfullargspec(
       image.ImageDataGenerator.__init__)[0]:
     if dtype is None:
       dtype = backend.floatx()
     kwargs['dtype'] = dtype
   super(DirectoryIterator, self).__init__(
       directory, image_data_generator,
       target_size=target_size,
       color_mode=color_mode,
       classes=classes,
       class_mode=class_mode,
       batch_size=batch_size,
       shuffle=shuffle,
       seed=seed,
       data_format=data_format,
       save_to_dir=save_to_dir,
       save_prefix=save_prefix,
       save_format=save_format,
       follow_links=follow_links,
       subset=subset,
       interpolation=interpolation,
       **kwargs)
def conv2d_bn(x,
              filters,
              kernel_size,
              strides=1,
              padding='same',
              activation='relu',
              use_bias=False,
              name=None):
  """Utility function to apply conv + BN.

  Arguments:
      x: input tensor.
      filters: filters in `Conv2D`.
      kernel_size: kernel size as in `Conv2D`.
      strides: strides in `Conv2D`.
      padding: padding mode in `Conv2D`.
      activation: activation in `Conv2D`.
      use_bias: whether to use a bias in `Conv2D`.
      name: name of the ops; will become `name + '_ac'` for the activation
          and `name + '_bn'` for the batch norm layer.

  Returns:
      Output tensor after applying `Conv2D` and `BatchNormalization`.
  """
  x = Conv2D(
      filters,
      kernel_size,
      strides=strides,
      padding=padding,
      use_bias=use_bias,
      name=name)(
          x)
  if not use_bias:
    bn_axis = 1 if K.image_data_format() == 'channels_first' else 3
    bn_name = None if name is None else name + '_bn'
    x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
  if activation is not None:
    ac_name = None if name is None else name + '_ac'
    x = Activation(activation, name=ac_name)(x)
  return x
Esempio n. 27
0
def transition_block(x, reduction, name):
  """A transition block.

  Arguments:
      x: input tensor.
      reduction: float, compression rate at transition layers.
      name: string, block label.

  Returns:
      output tensor for the block.
  """
  bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
  x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x)
  x = Activation('relu', name=name + '_relu')(x)
  x = Conv2D(
      int(K.int_shape(x)[bn_axis] * reduction),
      1,
      use_bias=False,
      name=name + '_conv')(
          x)
  x = AveragePooling2D(2, strides=2, name=name + '_pool')(x)
  return x
Esempio n. 28
0
def preprocess_input(x, data_format=None):
    """Preprocesses a tensor encoding a batch of images.

    # Arguments
        x: input Numpy tensor, 4D.
        data_format: data format of the image tensor.

    # Returns
        Preprocessed tensor.
    """
    if data_format is None:
        data_format = K.image_data_format()
    assert data_format in {'channels_last', 'channels_first'}

    if data_format == 'channels_first':
        if x.ndim == 3:
            # 'RGB'->'BGR'
            x = x[::-1, ...]
            # Zero-center by mean pixel
            x[0, :, :] -= 103.939
            x[1, :, :] -= 116.779
            x[2, :, :] -= 123.68
        else:
            x = x[:, ::-1, ...]
            x[:, 0, :, :] -= 103.939
            x[:, 1, :, :] -= 116.779
            x[:, 2, :, :] -= 123.68
    else:
        # 'RGB'->'BGR'
        x = x[..., ::-1]
        # Zero-center by mean pixel
        x[..., 0] -= 103.939
        x[..., 1] -= 116.779
        x[..., 2] -= 123.68

    x *= 0.017 # scale values

    return x
Esempio n. 29
0
def identity_block(input_tensor, kernel_size, filters, stage, block):
  """The identity block is the block that has no conv layer at shortcut.

  Arguments:
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names

  Returns:
      Output tensor for the block.
  """
  filters1, filters2, filters3 = filters
  if K.image_data_format() == 'channels_last':
    bn_axis = 3
  else:
    bn_axis = 1
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'

  x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
  x = Activation('relu')(x)

  x = Conv2D(
      filters2, kernel_size, padding='same', name=conv_name_base + '2b')(
          x)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
  x = Activation('relu')(x)

  x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
  x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

  x = layers.add([x, input_tensor])
  x = Activation('relu')(x)
  return x
Esempio n. 30
0
def img_to_array(img, data_format=None, dtype=None):
  """Converts a PIL Image instance to a Numpy array.

  Arguments:
      img: PIL Image instance.
      data_format: Image data format,
          either "channels_first" or "channels_last".
      dtype: Dtype to use for the returned array.

  Returns:
      A 3D Numpy array.

  Raises:
      ValueError: if invalid `img` or `data_format` is passed.
  """

  if data_format is None:
    data_format = backend.image_data_format()
  kwargs = {}
  if 'dtype' in tf_inspect.getfullargspec(image.img_to_array)[0]:
    if dtype is None:
      dtype = backend.floatx()
    kwargs['dtype'] = dtype
  return image.img_to_array(img, data_format=data_format, **kwargs)
Esempio n. 31
0
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                classifier_activation='softmax'):
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    layer_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
                input_shape = (3, cols, rows)
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]
                input_shape = (cols, rows, 3)
    # If input_shape is None and input_tensor is None using standart shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if (not minimalistic and alpha not in [0.75, 1.0]
                or minimalistic and alpha != 1.0):
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            logging.warning('`input_shape` is undefined or non-square, '
                            'or `rows` is not 224.'
                            ' Weights for input shape (224, 224) will be'
                            ' loaded as the default.')

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = img_input
    # x = layers.Rescaling(1. / 255.)(x)
    x = layers.Conv2D(16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      name='Conv')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv/BatchNorm')(x)
    x = activation(x)

    x = stack_fn(x, kernel, activation, se_ratio)

    last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)
    x = layers.Conv2D(last_conv_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1/BatchNorm')(x)
    x = activation(x)
    x = layers.Conv2D(last_point_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=True,
                      name='Conv_2')(x)
    x = activation(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        if channel_axis == 1:
            x = layers.Reshape((last_point_ch, 1, 1))(x)
        else:
            x = layers.Reshape((1, 1, last_point_ch))(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate)(x)
        x = layers.Conv2D(classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits')(x)
        x = layers.Flatten()(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Activation(activation=classifier_activation,
                              name='Predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = '{}{}_224_{}_float'.format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHT_PATH + file_name,
                                           cache_subdir='models',
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 32
0
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2)):
    """A block that has a conv layer at shortcut.

  # Arguments
      input_tensor: input tensor
      kernel_size: default 3, the kernel size of
          middle conv layer at main path
      filters: list of integers, the filters of 3 conv layer at main path
      stage: integer, current stage label, used for generating layer names
      block: 'a','b'..., current block label, used for generating layer names
      strides: Strides for the second conv layer in the block.

  # Returns
      Output tensor for the block.

  Note that from stage 3,
  the second conv layer at main path is with strides=(2, 2)
  And the shortcut should have strides=(2, 2) as well
  """
    filters1, filters2, filters3 = filters
    if backend.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = layers.Conv2D(filters1, (1, 1),
                      use_bias=False,
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name=conv_name_base + '2a')(input_tensor)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name=bn_name_base + '2a')(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters2,
                      kernel_size,
                      strides=strides,
                      padding='same',
                      use_bias=False,
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name=conv_name_base + '2b')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name=bn_name_base + '2b')(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filters3, (1, 1),
                      use_bias=False,
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name=conv_name_base + '2c')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name=bn_name_base + '2c')(x)

    shortcut = layers.Conv2D(
        filters3, (1, 1),
        use_bias=False,
        strides=strides,
        kernel_initializer='he_normal',
        kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
        name=conv_name_base + '1')(input_tensor)
    shortcut = layers.BatchNormalization(axis=bn_axis,
                                         momentum=BATCH_NORM_DECAY,
                                         epsilon=BATCH_NORM_EPSILON,
                                         name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x
Esempio n. 33
0
def DenseNet(
    batch_size,
    blocks,
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax',
):
  """Instantiates the DenseNet architecture.

  Optionally loads weights pre-trained on ImageNet.
  Note that the data format convention used by the model is
  the one specified in your Keras config at `~/.keras/keras.json`.

  Caution: Be sure to properly pre-process your inputs to the application.
  Please see `applications.densenet.preprocess_input` for an example.

  Arguments:
    blocks: numbers of building blocks for the four dense layers.
    include_top: whether to include the fully-connected
      layer at the top of the network.
    weights: one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor
      (i.e. output of `layers.Input()`)
      to use as image input for the model.
    input_shape: optional shape tuple, only to be specified
      if `include_top` is False (otherwise the input shape
      has to be `(224, 224, 3)` (with `'channels_last'` data format)
      or `(3, 224, 224)` (with `'channels_first'` data format).
      It should have exactly 3 inputs channels,
      and width and height should be no smaller than 32.
      E.g. `(200, 200, 3)` would be one valid value.
    pooling: optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model will be
          the 4D tensor output of the
          last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a 2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: optional number of classes to classify images
      into, only to be specified if `include_top` is True, and
      if no `weights` argument is specified.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
  if not (weights in {'imagenet', None} or os.path.exists(weights)):
    raise ValueError('The `weights` argument should be either '
                     '`None` (random initialization), `imagenet` '
                     '(pre-training on ImageNet), '
                     'or the path to the weights file to be loaded.')

  if weights == 'imagenet' and include_top and classes != 1000:
    raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
                     ' as true, `classes` should be 1000')

  # Determine proper input shape
  input_shape = imagenet_utils.obtain_input_shape(
      input_shape,
      default_size=224,
      min_size=32,
      data_format=backend.image_data_format(),
      require_flatten=include_top,
      weights=weights)

  if input_tensor is None:
    img_input = layers.Input(shape=input_shape, batch_size=batch_size)
  else:
    if not backend.is_keras_tensor(input_tensor):
      img_input = layers.Input(tensor=input_tensor, shape=input_shape, batch_size=batch_size)
    else:
      img_input = input_tensor
      raise NotImplemented("Unsupported flow")

  bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

  x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input)
  x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x)
  x = layers.BatchNormalization(
      axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(
          x)
  x = layers.Activation('relu', name='conv1/relu')(x)
  x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
  x = layers.MaxPooling2D(3, strides=2, name='pool1')(x)

  x = dense_block(x, blocks[0], name='conv2')
  x = transition_block(x, 0.5, name='pool2')
  x = dense_block(x, blocks[1], name='conv3')
  x = transition_block(x, 0.5, name='pool3')
  x = dense_block(x, blocks[2], name='conv4')
  x = transition_block(x, 0.5, name='pool4')
  x = dense_block(x, blocks[3], name='conv5')

  x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='bn')(x)
  x = layers.Activation('relu', name='relu')(x)

  if include_top:
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

    imagenet_utils.validate_activation(classifier_activation, weights)
    x = layers.Dense(classes, activation=classifier_activation,
                     name='predictions')(x)
  else:
    if pooling == 'avg':
      x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    elif pooling == 'max':
      x = layers.GlobalMaxPooling2D(name='max_pool')(x)

  # Ensure that the model takes into account
  # any potential predecessors of `input_tensor`.
  if input_tensor is not None:
    inputs = layer_utils.get_source_inputs(input_tensor)
  else:
    inputs = img_input

  # Create model.
  if blocks == [6, 12, 24, 16]:
    model = training.Model(inputs, x, name='densenet121')
  elif blocks == [6, 12, 32, 32]:
    model = training.Model(inputs, x, name='densenet169')
  elif blocks == [6, 12, 48, 32]:
    model = training.Model(inputs, x, name='densenet201')
  else:
    model = training.Model(inputs, x, name='densenet')

  # Load weights.
  if weights == 'imagenet':
    if include_top:
      if blocks == [6, 12, 24, 16]:
        weights_path = data_utils.get_file(
            'densenet121_weights_tf_dim_ordering_tf_kernels.h5',
            DENSENET121_WEIGHT_PATH,
            cache_subdir='models',
            file_hash='9d60b8095a5708f2dcce2bca79d332c7')
      elif blocks == [6, 12, 32, 32]:
        weights_path = data_utils.get_file(
            'densenet169_weights_tf_dim_ordering_tf_kernels.h5',
            DENSENET169_WEIGHT_PATH,
            cache_subdir='models',
            file_hash='d699b8f76981ab1b30698df4c175e90b')
      elif blocks == [6, 12, 48, 32]:
        weights_path = data_utils.get_file(
            'densenet201_weights_tf_dim_ordering_tf_kernels.h5',
            DENSENET201_WEIGHT_PATH,
            cache_subdir='models',
            file_hash='1ceb130c1ea1b78c3bf6114dbdfd8807')
    else:
      if blocks == [6, 12, 24, 16]:
        weights_path = data_utils.get_file(
            'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5',
            DENSENET121_WEIGHT_PATH_NO_TOP,
            cache_subdir='models',
            file_hash='30ee3e1110167f948a6b9946edeeb738')
      elif blocks == [6, 12, 32, 32]:
        weights_path = data_utils.get_file(
            'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5',
            DENSENET169_WEIGHT_PATH_NO_TOP,
            cache_subdir='models',
            file_hash='b8c4d4c20dd625c148057b9ff1c1176b')
      elif blocks == [6, 12, 48, 32]:
        weights_path = data_utils.get_file(
            'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5',
            DENSENET201_WEIGHT_PATH_NO_TOP,
            cache_subdir='models',
            file_hash='c13680b51ded0fb44dff2d8f86ac8bb1')
    model.load_weights(weights_path)
  elif weights is not None:
    model.load_weights(weights)

  return model
Esempio n. 34
0
def MobileNet(input_shape=None,
              alpha=1.0,
              depth_multiplier=1,
              dropout=1e-3,
              include_top=True,
              weights='imagenet',
              input_tensor=None,
              pooling=None,
              classes=1000,
              classifier_activation='softmax',
              **kwargs):
    """Instantiates the MobileNet architecture.

  Reference paper:
  - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision
    Applications](https://arxiv.org/abs/1704.04861)

  Optionally loads weights pre-trained on ImageNet.
  Note that the data format convention used by the model is
  the one specified in the `tf.keras.backend.image_data_format()`.

  Caution: Be sure to properly pre-process your inputs to the application.
  Please see `applications.mobilenet.preprocess_input` for an example.

  Arguments:
    input_shape: Optional shape tuple, only to be specified if `include_top`
      is False (otherwise the input shape has to be `(224, 224, 3)` (with
      `channels_last` data format) or (3, 224, 224) (with `channels_first`
      data format). It should have exactly 3 inputs channels, and width and
      height should be no smaller than 32. E.g. `(200, 200, 3)` would be one
      valid value. Default to `None`.
      `input_shape` will be ignored if the `input_tensor` is provided.
    alpha: Controls the width of the network. This is known as the width
      multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally
      decreases the number of filters in each layer. - If `alpha` > 1.0,
      proportionally increases the number of filters in each layer. - If
      `alpha` = 1, default number of filters from the paper are used at each
      layer. Default to 1.0.
    depth_multiplier: Depth multiplier for depthwise convolution. This is
      called the resolution multiplier in the MobileNet paper. Default to 1.0.
    dropout: Dropout rate. Default to 0.001.
    include_top: Boolean, whether to include the fully-connected layer at the
      top of the network. Default to `True`.
    weights: One of `None` (random initialization), 'imagenet' (pre-training
      on ImageNet), or the path to the weights file to be loaded. Default to
      `imagenet`.
    input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to
      use as image input for the model. `input_tensor` is useful for sharing
      inputs between multiple different networks. Default to None.
    pooling: Optional pooling mode for feature extraction when `include_top`
      is `False`.
      - `None` (default) means that the output of the model will be
          the 4D tensor output of the last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a 2D tensor.
      - `max` means that global max pooling will be applied.
    classes: Optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified. Defaults to 1000.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
    **kwargs: For backwards compatibility only.
  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    if 'layers' in kwargs:
        global layers
        layers = kwargs.pop('layers')
    if kwargs:
        raise ValueError('Unknown argument(s): %s' % (kwargs, ))
    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    if input_shape is None:
        default_size = 224
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if depth_multiplier != 1:
            raise ValueError('If imagenet weights are being loaded, '
                             'depth multiplier must be 1')

        if alpha not in [0.25, 0.50, 0.75, 1.0]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of'
                             '`0.25`, `0.50`, `0.75` or `1.0` only.')

        if rows != cols or rows not in [128, 160, 192, 224]:
            rows = 224
            logging.warning('`input_shape` is undefined or non-square, '
                            'or `rows` is not in [128, 160, 192, 224]. '
                            'Weights for input shape (224, 224) will be'
                            ' loaded as the default.')

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = _conv_block(img_input, 32, alpha, strides=(2, 2))
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

    x = _depthwise_conv_block(x,
                              128,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

    x = _depthwise_conv_block(x,
                              256,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=6)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

    x = _depthwise_conv_block(x,
                              1024,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=12)
    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

    if include_top:
        if backend.image_data_format() == 'channels_first':
            shape = (int(1024 * alpha), 1, 1)
        else:
            shape = (1, 1, int(1024 * alpha))

        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Reshape(shape, name='reshape_1')(x)
        x = layers.Dropout(dropout, name='dropout')(x)
        x = layers.Conv2D(classes, (1, 1), padding='same',
                          name='conv_preds')(x)
        x = layers.Reshape((classes, ), name='reshape_2')(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Activation(activation=classifier_activation,
                              name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs,
                           x,
                           name='mobilenet_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        if include_top:
            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = data_utils.get_file(model_name,
                                               weight_path,
                                               cache_subdir='models')
        else:
            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = data_utils.get_file(model_name,
                                               weight_path,
                                               cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 35
0
def _reduction_a_cell(ip, p, filters, block_id=None):
    """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper).

  Args:
    ip: Input tensor `x`
    p: Input tensor `p`
    filters: Number of output filters
    block_id: String block_id

  Returns:
    A Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('reduction_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          name='reduction_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        h = layers.BatchNormalization(axis=channel_dim,
                                      momentum=0.9997,
                                      epsilon=1e-3,
                                      name='reduction_bn_1_%s' % block_id)(h)
        h3 = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(h, 3),
                                  name='reduction_pad_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_left1_%s' %
                                         block_id)
            x1_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left2_%s' % block_id)(h3)
            x2_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left3_%s' % block_id)(h3)
            x3_2 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_right3_%s' %
                                         block_id)
            x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='reduction_left4_%s' % block_id)(x1)
            x4 = layers.add([x2, x4])

        with backend.name_scope('block_5'):
            x5_1 = _separable_conv_block(x1,
                                         filters, (3, 3),
                                         block_id='reduction_left4_%s' %
                                         block_id)
            x5_2 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_right5_%s' % block_id)(h3)
            x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id)

        x = layers.concatenate([x2, x3, x4, x5],
                               axis=channel_dim,
                               name='reduction_concat_%s' % block_id)
        return x, ip
Esempio n. 36
0
    def _make_histogram_ops(self, model):
        """Defines histogram ops when histogram_freq > 0."""
        # only make histogram summary op if it hasn't already been made
        if self.histogram_freq and self.merged is None:
            for layer in self.model.layers:
                for weight in layer.weights:
                    mapped_weight_name = weight.name.replace(':', '_')
                    tf_summary.histogram(mapped_weight_name, weight)
                    if self.write_images:
                        w_img = array_ops.squeeze(weight)
                        shape = K.int_shape(w_img)
                        if len(shape) == 2:  # dense layer kernel case
                            if shape[0] > shape[1]:
                                w_img = array_ops.transpose(w_img)
                                shape = K.int_shape(w_img)
                            w_img = array_ops.reshape(
                                w_img, [1, shape[0], shape[1], 1])
                        elif len(shape) == 3:  # convnet case
                            if K.image_data_format() == 'channels_last':
                                # switch to channels_first to display
                                # every kernel as a separate image
                                w_img = array_ops.transpose(w_img,
                                                            perm=[2, 0, 1])
                                shape = K.int_shape(w_img)
                            w_img = array_ops.reshape(
                                w_img, [shape[0], shape[1], shape[2], 1])
                        elif len(shape) == 1:  # bias case
                            w_img = array_ops.reshape(w_img,
                                                      [1, shape[0], 1, 1])
                        else:
                            # not possible to handle 3D convnets etc.
                            continue

                        shape = K.int_shape(w_img)
                        assert len(shape) == 4 and shape[-1] in [1, 3, 4]
                        tf_summary.image(mapped_weight_name, w_img)

                if self.write_grads:
                    for weight in layer.trainable_weights:
                        mapped_weight_name = weight.name.replace(':', '_')
                        grads = model.optimizer.get_gradients(
                            model.total_loss, weight)

                        def is_indexed_slices(grad):
                            return type(grad).__name__ == 'IndexedSlices'

                        grads = [
                            grad.values if is_indexed_slices(grad) else grad
                            for grad in grads
                        ]
                        tf_summary.histogram(
                            '{}_grad'.format(mapped_weight_name), grads)

                if hasattr(layer, 'output'):
                    if isinstance(layer.output, list):
                        for i, output in enumerate(layer.output):
                            tf_summary.histogram(
                                '{}_out_{}'.format(layer.name, i), output)
                    else:
                        tf_summary.histogram('{}_out'.format(layer.name),
                                             layer.output)
Esempio n. 37
0
def ResNet(input_shape=None,
           classes=10,
           block='bottleneck',
           residual_unit='v2',
           repetitions=None,
           initial_filters=64,
           activation='softmax',
           include_top=True,
           input_tensor=None,
           dropout=None,
           transition_dilation_rate=(1, 1),
           initial_strides=(2, 2),
           initial_kernel_size=(7, 7),
           initial_pooling='max',
           final_pooling=None,
           top='classification'):
    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.

    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size is halved.
            Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
        transition_dilation_rate: Used for pixel-wise prediction tasks such as image segmentation.
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv.
            See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet
            and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final model layer
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options are
            'classification' for ImageNet style problems, 'segmentation' for problems like
            the Pascal VOC dataset, and None to exclude these layers entirely.

    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError(
            'activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError(
            'sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)
    _handle_dim_ordering()
    if len(input_shape) != 3:
        raise Exception(
            "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = _conv_bn_relu(filters=initial_filters,
                      kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        x = MaxPooling2D(pool_size=(3, 3),
                         strides=initial_strides,
                         padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1) and i != 0:
            transition_strides[0] = (2, 2)
        block = _residual_block(
            block_fn,
            filters=filters,
            stage=i,
            blocks=r,
            is_first_layer=(i == 0),
            dropout=dropout,
            transition_dilation_rates=transition_dilation_rates,
            transition_strides=transition_strides,
            residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation
    x = _bn_relu(block)

    # Classifier block
    if include_top and top is 'classification':
        x = GlobalAveragePooling2D()(x)
        x = Dense(units=classes,
                  activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top is 'segmentation':
        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)

        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling2D()(x)

    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    model = Model(inputs=inputs, outputs=x, name='resnet')
    return model
Esempio n. 38
0
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
  """Inverted ResNet block."""
  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

  in_channels = backend.int_shape(inputs)[channel_axis]
  pointwise_conv_filters = int(filters * alpha)
  pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
  x = inputs
  prefix = 'block_{}_'.format(block_id)

  if block_id:
    # Expand
    x = layers.Conv2D(
        expansion * in_channels,
        kernel_size=1,
        padding='same',
        use_bias=False,
        activation=None,
        name=prefix + 'expand')(
            x)
    x = layers.BatchNormalization(
        axis=channel_axis,
        epsilon=1e-3,
        momentum=0.999,
        name=prefix + 'expand_BN')(
            x)
    x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
  else:
    prefix = 'expanded_conv_'

  # Depthwise
  if stride == 2:
    x = layers.ZeroPadding2D(
        padding=imagenet_utils.correct_pad(x, 3),
        name=prefix + 'pad')(x)
  x = layers.DepthwiseConv2D(
      kernel_size=3,
      strides=stride,
      activation=None,
      use_bias=False,
      padding='same' if stride == 1 else 'valid',
      name=prefix + 'depthwise')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'depthwise_BN')(
          x)

  x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)

  # Project
  x = layers.Conv2D(
      pointwise_filters,
      kernel_size=1,
      padding='same',
      use_bias=False,
      activation=None,
      name=prefix + 'project')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis,
      epsilon=1e-3,
      momentum=0.999,
      name=prefix + 'project_BN')(
          x)

  if in_channels == pointwise_filters and stride == 1:
    return layers.Add(name=prefix + 'add')([inputs, x])
  return x
Esempio n. 39
0
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                classifier_activation='softmax',
                **kwargs):
  """Instantiates the MobileNetV2 architecture.

  MobileNetV2 is very similar to the original MobileNet,
  except that it uses inverted residual blocks with
  bottlenecking features. It has a drastically lower
  parameter count than the original MobileNet.
  MobileNets support any input size greater
  than 32 x 32, with larger image sizes
  offering better performance.

  Reference:
  - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](
      https://arxiv.org/abs/1801.04381) (CVPR 2018)

  This function returns a Keras image classification model,
  optionally loaded with weights pre-trained on ImageNet.

  For image classification use cases, see
  [this page for detailed examples](
    https://keras.io/api/applications/#usage-examples-for-image-classification-models).

  For transfer learning use cases, make sure to read the
  [guide to transfer learning & fine-tuning](
    https://keras.io/guides/transfer_learning/).

  Note: each Keras Application expects a specific kind of input preprocessing.
  For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input`
  on your inputs before passing them to the model.
  `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1.

  Args:
    input_shape: Optional shape tuple, to be specified if you would
      like to use a model with an input image resolution that is not
      (224, 224, 3).
      It should have exactly 3 inputs channels (224, 224, 3).
      You can also omit this option if you would like
      to infer input_shape from an input_tensor.
      If you choose to include both input_tensor and input_shape then
      input_shape will be used if they match, if the shapes
      do not match then we will throw an error.
      E.g. `(160, 160, 3)` would be one valid value.
    alpha: Float between 0 and 1. controls the width of the network.
      This is known as the width multiplier in the MobileNetV2 paper,
      but the name is kept for consistency with `applications.MobileNetV1`
      model in Keras.
      - If `alpha` < 1.0, proportionally decreases the number
          of filters in each layer.
      - If `alpha` > 1.0, proportionally increases the number
          of filters in each layer.
      - If `alpha` = 1.0, default number of filters from the paper
          are used at each layer.
    include_top: Boolean, whether to include the fully-connected
      layer at the top of the network. Defaults to `True`.
    weights: String, one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: Optional Keras tensor (i.e. output of
      `layers.Input()`)
      to use as image input for the model.
    pooling: String, optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model
          will be the 4D tensor output of the
          last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a
          2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: Integer, optional number of classes to classify images
      into, only to be specified if `include_top` is True, and
      if no `weights` argument is specified.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
      When loading pretrained weights, `classifier_activation` can only
      be `None` or `"softmax"`.
    **kwargs: For backwards compatibility only.

  Returns:
    A `keras.Model` instance.
  """
  global layers
  if 'layers' in kwargs:
    layers = kwargs.pop('layers')
  else:
    layers = VersionAwareLayers()
  if kwargs:
    raise ValueError('Unknown argument(s): %s' % (kwargs,))
  if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
    raise ValueError('The `weights` argument should be either '
                     '`None` (random initialization), `imagenet` '
                     '(pre-training on ImageNet), '
                     'or the path to the weights file to be loaded.')

  if weights == 'imagenet' and include_top and classes != 1000:
    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
                     'as true, `classes` should be 1000')

  # Determine proper input shape and default size.
  # If both input_shape and input_tensor are used, they should match
  if input_shape is not None and input_tensor is not None:
    try:
      is_input_t_tensor = backend.is_keras_tensor(input_tensor)
    except ValueError:
      try:
        is_input_t_tensor = backend.is_keras_tensor(
            layer_utils.get_source_inputs(input_tensor))
      except ValueError:
        raise ValueError('input_tensor: ', input_tensor,
                         'is not type input_tensor')
    if is_input_t_tensor:
      if backend.image_data_format() == 'channels_first':
        if backend.int_shape(input_tensor)[1] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
      else:
        if backend.int_shape(input_tensor)[2] != input_shape[1]:
          raise ValueError('input_shape: ', input_shape, 'and input_tensor: ',
                           input_tensor,
                           'do not meet the same shape requirements')
    else:
      raise ValueError('input_tensor specified: ', input_tensor,
                       'is not a keras tensor')

  # If input_shape is None, infer shape from input_tensor
  if input_shape is None and input_tensor is not None:

    try:
      backend.is_keras_tensor(input_tensor)
    except ValueError:
      raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                       type(input_tensor), 'which is not a valid type')

    if input_shape is None and not backend.is_keras_tensor(input_tensor):
      default_size = 224
    elif input_shape is None and backend.is_keras_tensor(input_tensor):
      if backend.image_data_format() == 'channels_first':
        rows = backend.int_shape(input_tensor)[2]
        cols = backend.int_shape(input_tensor)[3]
      else:
        rows = backend.int_shape(input_tensor)[1]
        cols = backend.int_shape(input_tensor)[2]

      if rows == cols and rows in [96, 128, 160, 192, 224]:
        default_size = rows
      else:
        default_size = 224

  # If input_shape is None and no input_tensor
  elif input_shape is None:
    default_size = 224

  # If input_shape is not None, assume default size
  else:
    if backend.image_data_format() == 'channels_first':
      rows = input_shape[1]
      cols = input_shape[2]
    else:
      rows = input_shape[0]
      cols = input_shape[1]

    if rows == cols and rows in [96, 128, 160, 192, 224]:
      default_size = rows
    else:
      default_size = 224

  input_shape = imagenet_utils.obtain_input_shape(
      input_shape,
      default_size=default_size,
      min_size=32,
      data_format=backend.image_data_format(),
      require_flatten=include_top,
      weights=weights)

  if backend.image_data_format() == 'channels_last':
    row_axis, col_axis = (0, 1)
  else:
    row_axis, col_axis = (1, 2)
  rows = input_shape[row_axis]
  cols = input_shape[col_axis]

  if weights == 'imagenet':
    if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
      raise ValueError('If imagenet weights are being loaded, '
                       'alpha can be one of `0.35`, `0.50`, `0.75`, '
                       '`1.0`, `1.3` or `1.4` only.')

    if rows != cols or rows not in [96, 128, 160, 192, 224]:
      rows = 224
      logging.warning('`input_shape` is undefined or non-square, '
                      'or `rows` is not in [96, 128, 160, 192, 224].'
                      ' Weights for input shape (224, 224) will be'
                      ' loaded as the default.')

  if input_tensor is None:
    img_input = layers.Input(shape=input_shape)
  else:
    if not backend.is_keras_tensor(input_tensor):
      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
    else:
      img_input = input_tensor

  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

  first_block_filters = _make_divisible(32 * alpha, 8)
  x = layers.Conv2D(
      first_block_filters,
      kernel_size=3,
      strides=(2, 2),
      padding='same',
      use_bias=False,
      name='Conv1')(img_input)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(
          x)
  x = layers.ReLU(6., name='Conv1_relu')(x)

  x = _inverted_res_block(
      x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0)

  x = _inverted_res_block(
      x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1)
  x = _inverted_res_block(
      x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2)

  x = _inverted_res_block(
      x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3)
  x = _inverted_res_block(
      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4)
  x = _inverted_res_block(
      x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5)

  x = _inverted_res_block(
      x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6)
  x = _inverted_res_block(
      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7)
  x = _inverted_res_block(
      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8)
  x = _inverted_res_block(
      x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9)

  x = _inverted_res_block(
      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10)
  x = _inverted_res_block(
      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11)
  x = _inverted_res_block(
      x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12)

  x = _inverted_res_block(
      x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
  x = _inverted_res_block(
      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
  x = _inverted_res_block(
      x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)

  x = _inverted_res_block(
      x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)

  # no alpha applied to last conv as stated in the paper:
  # if the width multiplier is greater than 1 we
  # increase the number of output channels
  if alpha > 1.0:
    last_block_filters = _make_divisible(1280 * alpha, 8)
  else:
    last_block_filters = 1280

  x = layers.Conv2D(
      last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(
          x)
  x = layers.BatchNormalization(
      axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(
          x)
  x = layers.ReLU(6., name='out_relu')(x)

  if include_top:
    x = layers.GlobalAveragePooling2D()(x)
    imagenet_utils.validate_activation(classifier_activation, weights)
    x = layers.Dense(classes, activation=classifier_activation,
                     name='predictions')(x)

  else:
    if pooling == 'avg':
      x = layers.GlobalAveragePooling2D()(x)
    elif pooling == 'max':
      x = layers.GlobalMaxPooling2D()(x)

  # Ensure that the model takes into account
  # any potential predecessors of `input_tensor`.
  if input_tensor is not None:
    inputs = layer_utils.get_source_inputs(input_tensor)
  else:
    inputs = img_input

  # Create model.
  model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows))

  # Load weights.
  if weights == 'imagenet':
    if include_top:
      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                    str(float(alpha)) + '_' + str(rows) + '.h5')
      weight_path = BASE_WEIGHT_PATH + model_name
      weights_path = data_utils.get_file(
          model_name, weight_path, cache_subdir='models')
    else:
      model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                    str(float(alpha)) + '_' + str(rows) + '_no_top' + '.h5')
      weight_path = BASE_WEIGHT_PATH + model_name
      weights_path = data_utils.get_file(
          model_name, weight_path, cache_subdir='models')
    model.load_weights(weights_path)
  elif weights is not None:
    model.load_weights(weights)

  return model
Esempio n. 40
0
    def __init__(self, num_classes=10, dtype="float32", batch_size=None):
        super(CustomModel, self).__init__(name="resnet50")

        if backend.image_data_format() == "channels_first":
            self._lambda = layers.Lambda(
                lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
                name="transpose",
            )
            bn_axis = 1
            data_format = "channels_first"
        else:
            bn_axis = 3
            data_format = "channels_last"

        self._padding = layers.ZeroPadding2D(padding=(3, 3),
                                             data_format=data_format,
                                             name="zero_pad")
        self._conv2d_1 = layers.Conv2D(
            64,
            (7, 7),
            strides=(2, 2),
            padding="valid",
            use_bias=False,
            kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
            name="conv1",
        )
        self._bn_1 = layers.BatchNormalization(
            axis=bn_axis,
            momentum=BATCH_NORM_DECAY,
            epsilon=BATCH_NORM_EPSILON,
            name="bn_conv1",
        )
        self._activation_1 = layers.Activation("relu")
        self._maxpooling2d = layers.MaxPooling2D((3, 3),
                                                 strides=(2, 2),
                                                 padding="same")

        self._conv_block_1 = ConvBlock(3, [64, 64, 256],
                                       stage=2,
                                       block="a",
                                       strides=(1, 1))
        self._identity_block_1 = IdentityBlock(3, [64, 64, 256],
                                               stage=2,
                                               block="b")
        self._identity_block_2 = IdentityBlock(3, [64, 64, 256],
                                               stage=2,
                                               block="c")

        self._conv_block_2 = ConvBlock(3, [128, 128, 512], stage=3, block="a")
        self._identity_block_3 = IdentityBlock(3, [128, 128, 512],
                                               stage=3,
                                               block="b")
        self._identity_block_4 = IdentityBlock(3, [128, 128, 512],
                                               stage=3,
                                               block="c")
        self._identity_block_5 = IdentityBlock(3, [128, 128, 512],
                                               stage=3,
                                               block="d")

        self._conv_block_3 = ConvBlock(3, [256, 256, 1024], stage=4, block="a")
        self._identity_block_6 = IdentityBlock(3, [256, 256, 1024],
                                               stage=4,
                                               block="b")
        self._identity_block_7 = IdentityBlock(3, [256, 256, 1024],
                                               stage=4,
                                               block="c")
        self._identity_block_8 = IdentityBlock(3, [256, 256, 1024],
                                               stage=4,
                                               block="d")
        self._identity_block_9 = IdentityBlock(3, [256, 256, 1024],
                                               stage=4,
                                               block="e")
        self._identity_block_10 = IdentityBlock(3, [256, 256, 1024],
                                                stage=4,
                                                block="f")

        self._conv_block_4 = ConvBlock(3, [512, 512, 2048], stage=5, block="a")
        self._identity_block_11 = IdentityBlock(3, [512, 512, 2048],
                                                stage=5,
                                                block="b")
        self._identity_block_12 = IdentityBlock(3, [512, 512, 2048],
                                                stage=5,
                                                block="c")

        rm_axes = ([1, 2] if backend.image_data_format() == "channels_last"
                   else [2, 3])
        self._lamba_2 = layers.Lambda(lambda x: backend.mean(x, rm_axes),
                                      name="reduce_mean")
        self._dense = layers.Dense(
            num_classes,
            kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
            bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
            name="fc1000",
        )
        self._activation_2 = layers.Activation("softmax")
Esempio n. 41
0
def VGG16(include_top=True,
          weights='imagenet',
          input_tensor=None,
          input_shape=None,
          pooling=None,
          classes=1000,
          classifier_activation='softmax'):
    """Instantiates the VGG16 model.

  Reference paper:
  - [Very Deep Convolutional Networks for Large-Scale Image Recognition](
  https://arxiv.org/abs/1409.1556) (ICLR 2015)

  By default, it loads weights pre-trained on ImageNet. Check 'weights' for
  other options.

  This model can be built both with 'channels_first' data format
  (channels, height, width) or 'channels_last' data format
  (height, width, channels).

  The default input size for this model is 224x224.

  Caution: Be sure to properly pre-process your inputs to the application.
  Please see `applications.vgg16.preprocess_input` for an example.

  Arguments:
      include_top: whether to include the 3 fully-connected
          layers at the top of the network.
      weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
      input_tensor: optional Keras tensor
          (i.e. output of `layers.Input()`)
          to use as image input for the model.
      input_shape: optional shape tuple, only to be specified
          if `include_top` is False (otherwise the input shape
          has to be `(224, 224, 3)`
          (with `channels_last` data format)
          or `(3, 224, 224)` (with `channels_first` data format).
          It should have exactly 3 input channels,
          and width and height should be no smaller than 32.
          E.g. `(200, 200, 3)` would be one valid value.
      pooling: Optional pooling mode for feature extraction
          when `include_top` is `False`.
          - `None` means that the output of the model will be
              the 4D tensor output of the
              last convolutional block.
          - `avg` means that global average pooling
              will be applied to the output of the
              last convolutional block, and thus
              the output of the model will be a 2D tensor.
          - `max` means that global max pooling will
              be applied.
      classes: optional number of classes to classify images
          into, only to be specified if `include_top` is True, and
          if no `weights` argument is specified.
      classifier_activation: A `str` or callable. The activation function to use
          on the "top" layer. Ignored unless `include_top=True`. Set
          `classifier_activation=None` to return the logits of the "top" layer.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    if not (weights in {'imagenet', None} or file_io.file_exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')
    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(img_input)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = layers.Flatten(name='flatten')(x)
        x = layers.Dense(4096, activation='relu', name='fc1')(x)
        x = layers.Dense(4096, activation='relu', name='fc2')(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = training.Model(inputs, x, name='vgg16')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = data_utils.get_file(
                'vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='64373286793e3c8b2b4e3219cbf3544b')
        else:
            weights_path = data_utils.get_file(
                'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='6d6bbae143d832006294945121d1f1fc')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 42
0
def NASNet(input_shape=None,
           penultimate_filters=4032,
           num_blocks=6,
           stem_block_filters=96,
           skip_reduction=True,
           filter_multiplier=2,
           include_top=True,
           weights='imagenet',
           input_tensor=None,
           pooling=None,
           classes=1000,
           default_size=None,
           classifier_activation='softmax'):
    """Instantiates a NASNet model.

  Reference:
  - [Learning Transferable Architectures for Scalable Image Recognition](
      https://arxiv.org/abs/1707.07012) (CVPR 2018)

  Optionally loads weights pre-trained on ImageNet.
  Note that the data format convention used by the model is
  the one specified in your Keras config at `~/.keras/keras.json`.

  Args:
    input_shape: Optional shape tuple, the input shape
      is by default `(331, 331, 3)` for NASNetLarge and
      `(224, 224, 3)` for NASNetMobile.
      It should have exactly 3 input channels,
      and width and height should be no smaller than 32.
      E.g. `(224, 224, 3)` would be one valid value.
    penultimate_filters: Number of filters in the penultimate layer.
      NASNet models use the notation `NASNet (N @ P)`, where:
          -   N is the number of blocks
          -   P is the number of penultimate filters
    num_blocks: Number of repeated blocks of the NASNet model.
      NASNet models use the notation `NASNet (N @ P)`, where:
          -   N is the number of blocks
          -   P is the number of penultimate filters
    stem_block_filters: Number of filters in the initial stem block
    skip_reduction: Whether to skip the reduction step at the tail
      end of the network.
    filter_multiplier: Controls the width of the network.
      - If `filter_multiplier` < 1.0, proportionally decreases the number
          of filters in each layer.
      - If `filter_multiplier` > 1.0, proportionally increases the number
          of filters in each layer.
      - If `filter_multiplier` = 1, default number of filters from the
           paper are used at each layer.
    include_top: Whether to include the fully-connected
      layer at the top of the network.
    weights: `None` (random initialization) or
        `imagenet` (ImageNet weights)
    input_tensor: Optional Keras tensor (i.e. output of
      `layers.Input()`)
      to use as image input for the model.
    pooling: Optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model
          will be the 4D tensor output of the
          last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a
          2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: Optional number of classes to classify images
      into, only to be specified if `include_top` is True, and
      if no `weights` argument is specified.
    default_size: Specifies the default image size of the model
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: In case of invalid argument for `weights`,
      invalid input shape or invalid `penultimate_filters` value.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    if (isinstance(input_shape, tuple) and None in input_shape
            and weights == 'imagenet'):
        raise ValueError('When specifying the input shape of a NASNet'
                         ' and loading `ImageNet` weights, '
                         'the input_shape argument must be static '
                         '(no None entries). Got: `input_shape=' +
                         str(input_shape) + '`.')

    if default_size is None:
        default_size = 331

    # Determine proper input shape and default size.
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if backend.image_data_format() != 'channels_last':
        logging.warning(
            'The NASNet family of models is only available '
            'for the input data format "channels_last" '
            '(width, height, channels). '
            'However your settings specify the default '
            'data format "channels_first" (channels, width, height).'
            ' You should set `image_data_format="channels_last"` '
            'in your Keras config located at ~/.keras/keras.json. '
            'The model being returned right now will expect inputs '
            'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if penultimate_filters % (24 * (filter_multiplier**2)) != 0:
        raise ValueError(
            'For NASNet-A models, the `penultimate_filters` must be a multiple '
            'of 24 * (`filter_multiplier` ** 2). Current value: %d' %
            penultimate_filters)

    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    filters = penultimate_filters // 24

    x = layers.Conv2D(stem_block_filters, (3, 3),
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='stem_conv1',
                      kernel_initializer='he_normal')(img_input)

    x = layers.BatchNormalization(axis=channel_dim,
                                  momentum=0.9997,
                                  epsilon=1e-3,
                                  name='stem_bn1')(x)

    p = None
    x, p = _reduction_a_cell(x,
                             p,
                             filters // (filter_multiplier**2),
                             block_id='stem_1')
    x, p = _reduction_a_cell(x,
                             p,
                             filters // filter_multiplier,
                             block_id='stem_2')

    for i in range(num_blocks):
        x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i))

    x, p0 = _reduction_a_cell(x,
                              p,
                              filters * filter_multiplier,
                              block_id='reduce_%d' % (num_blocks))

    p = p0 if not skip_reduction else p

    for i in range(num_blocks):
        x, p = _normal_a_cell(x,
                              p,
                              filters * filter_multiplier,
                              block_id='%d' % (num_blocks + i + 1))

    x, p0 = _reduction_a_cell(x,
                              p,
                              filters * filter_multiplier**2,
                              block_id='reduce_%d' % (2 * num_blocks))

    p = p0 if not skip_reduction else p

    for i in range(num_blocks):
        x, p = _normal_a_cell(x,
                              p,
                              filters * filter_multiplier**2,
                              block_id='%d' % (2 * num_blocks + i + 1))

    x = layers.Activation('relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = training.Model(inputs, x, name='NASNet')

    # Load weights.
    if weights == 'imagenet':
        if default_size == 224:  # mobile version
            if include_top:
                weights_path = data_utils.get_file(
                    'nasnet_mobile.h5',
                    NASNET_MOBILE_WEIGHT_PATH,
                    cache_subdir='models',
                    file_hash='020fb642bf7360b370c678b08e0adf61')
            else:
                weights_path = data_utils.get_file(
                    'nasnet_mobile_no_top.h5',
                    NASNET_MOBILE_WEIGHT_PATH_NO_TOP,
                    cache_subdir='models',
                    file_hash='1ed92395b5b598bdda52abe5c0dbfd63')
            model.load_weights(weights_path)
        elif default_size == 331:  # large version
            if include_top:
                weights_path = data_utils.get_file(
                    'nasnet_large.h5',
                    NASNET_LARGE_WEIGHT_PATH,
                    cache_subdir='models',
                    file_hash='11577c9a518f0070763c2b964a382f17')
            else:
                weights_path = data_utils.get_file(
                    'nasnet_large_no_top.h5',
                    NASNET_LARGE_WEIGHT_PATH_NO_TOP,
                    cache_subdir='models',
                    file_hash='d81d89dc07e6e56530c4e77faddd61b5')
            model.load_weights(weights_path)
        else:
            raise ValueError(
                'ImageNet weights can only be loaded with NASNetLarge'
                ' or NASNetMobile')
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)

    return model
Esempio n. 43
0
def InceptionResNetV2(include_top=True,
                      weights='imagenet',
                      input_tensor=None,
                      input_shape=None,
                      pooling=None,
                      classes=1000,
                      classifier_activation='softmax',
                      **kwargs):
    """Instantiates the Inception-ResNet v2 architecture.

  Reference:
  - [Inception-v4, Inception-ResNet and the Impact of
     Residual Connections on Learning](https://arxiv.org/abs/1602.07261)
    (AAAI 2017)

  This function returns a Keras image classification model,
  optionally loaded with weights pre-trained on ImageNet.

  For image classification use cases, see
  [this page for detailed examples](
    https://keras.io/api/applications/#usage-examples-for-image-classification-models).

  For transfer learning use cases, make sure to read the
  [guide to transfer learning & fine-tuning](
    https://keras.io/guides/transfer_learning/).

  Note: each Keras Application expects a specific kind of input preprocessing.
  For InceptionResNetV2, call
  `tf.keras.applications.inception_resnet_v2.preprocess_input`
  on your inputs before passing them to the model.
  `inception_resnet_v2.preprocess_input`
  will scale input pixels between -1 and 1.

  Args:
    include_top: whether to include the fully-connected
      layer at the top of the network.
    weights: one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
      to use as image input for the model.
    input_shape: optional shape tuple, only to be specified
      if `include_top` is `False` (otherwise the input shape
      has to be `(299, 299, 3)` (with `'channels_last'` data format)
      or `(3, 299, 299)` (with `'channels_first'` data format).
      It should have exactly 3 inputs channels,
      and width and height should be no smaller than 75.
      E.g. `(150, 150, 3)` would be one valid value.
    pooling: Optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model will be
          the 4D tensor output of the last convolutional block.
      - `'avg'` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a 2D tensor.
      - `'max'` means that global max pooling will be applied.
    classes: optional number of classes to classify images
      into, only to be specified if `include_top` is `True`, and
      if no `weights` argument is specified.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
      When loading pretrained weights, `classifier_activation` can only
      be `None` or `"softmax"`.
    **kwargs: For backwards compatibility only.

  Returns:
    A `keras.Model` instance.
  """
    global layers
    if 'layers' in kwargs:
        layers = kwargs.pop('layers')
    else:
        layers = VersionAwareLayers()
    if kwargs:
        raise ValueError('Unknown argument(s): %s' % (kwargs, ))
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=299,
        min_size=75,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Stem block: 35 x 35 x 192
    x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid')
    x = conv2d_bn(x, 32, 3, padding='valid')
    x = conv2d_bn(x, 64, 3)
    x = layers.MaxPooling2D(3, strides=2)(x)
    x = conv2d_bn(x, 80, 1, padding='valid')
    x = conv2d_bn(x, 192, 3, padding='valid')
    x = layers.MaxPooling2D(3, strides=2)(x)

    # Mixed 5b (Inception-A block): 35 x 35 x 320
    branch_0 = conv2d_bn(x, 96, 1)
    branch_1 = conv2d_bn(x, 48, 1)
    branch_1 = conv2d_bn(branch_1, 64, 5)
    branch_2 = conv2d_bn(x, 64, 1)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3
    x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches)

    # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
    for block_idx in range(1, 11):
        x = inception_resnet_block(x,
                                   scale=0.17,
                                   block_type='block35',
                                   block_idx=block_idx)

    # Mixed 6a (Reduction-A block): 17 x 17 x 1088
    branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 256, 3)
    branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid')
    branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
    branches = [branch_0, branch_1, branch_pool]
    x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches)

    # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
    for block_idx in range(1, 21):
        x = inception_resnet_block(x,
                                   scale=0.1,
                                   block_type='block17',
                                   block_idx=block_idx)

    # Mixed 7a (Reduction-B block): 8 x 8 x 2080
    branch_0 = conv2d_bn(x, 256, 1)
    branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid')
    branch_2 = conv2d_bn(x, 256, 1)
    branch_2 = conv2d_bn(branch_2, 288, 3)
    branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid')
    branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches)

    # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
    for block_idx in range(1, 10):
        x = inception_resnet_block(x,
                                   scale=0.2,
                                   block_type='block8',
                                   block_idx=block_idx)
    x = inception_resnet_block(x,
                               scale=1.,
                               activation=None,
                               block_type='block8',
                               block_idx=10)

    # Final convolution block: 8 x 8 x 1536
    x = conv2d_bn(x, 1536, 1, name='conv_7b')

    if include_top:
        # Classification block
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name='inception_resnet_v2')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5'
            weights_path = data_utils.get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='e693bd0210a403b3192acc6073ad2e96')
        else:
            fname = ('inception_resnet_v2_weights_'
                     'tf_dim_ordering_tf_kernels_notop.h5')
            weights_path = data_utils.get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='d19885ff4a710c122648d3b5c3b684e4')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
    def resnet152(self, num_classes, dtype='float32', batch_size=None):
        # TODO(tfboyd): add training argument, just lik resnet56.
        """Instantiates the ResNet50 architecture.

    Args:
      num_classes: `int` number of classes for image classification.

    Returns:
        A Keras model instance.
    """
        input_shape = (384, 384, 3)
        img_input = layers.Input(shape=input_shape,
                                 dtype=dtype,
                                 batch_size=batch_size)

        if backend.image_data_format() == 'channels_first':
            x = layers.Lambda(
                lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
                name='transpose')(img_input)
            bn_axis = 1
        else:  # channels_last
            x = img_input
            bn_axis = 3

        x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
        x = layers.Conv2D(64, (7, 7),
                          strides=(2, 2),
                          padding='valid',
                          use_bias=False,
                          kernel_initializer='he_normal',
                          kernel_regularizer=regularizers.l2(
                              self.L2_WEIGHT_DECAY),
                          name='conv1')(x)
        x = layers.BatchNormalization(axis=bn_axis,
                                      momentum=self.BATCH_NORM_DECAY,
                                      epsilon=self.BATCH_NORM_EPSILON,
                                      name='bn_conv1')(x)
        x = layers.Activation('relu')(x)
        x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

        x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
        x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
        x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

        x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='e')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='f')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='g')
        x = identity_block(x, 3, [128, 128, 512], stage=3, block='h')

        x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='g')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='h')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='i')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='j')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='k')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='l')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='m')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='n')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='o')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='p')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='q')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='r')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='s')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='t')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='u')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='v')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='w')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='x')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='y')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='z')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='aa')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ab')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ac')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ad')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ae')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='af')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ag')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ah')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ai')
        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='aj')

        x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
        x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

        rm_axes = [
            1, 2
        ] if backend.image_data_format() == 'channels_last' else [2, 3]
        x = layers.Lambda(lambda x: backend.mean(x, rm_axes),
                          name='reduce_mean')(x)
        x = layers.Dense(
            num_classes,
            kernel_regularizer=regularizers.l2(self.L2_WEIGHT_DECAY),
            bias_regularizer=regularizers.l2(self.L2_WEIGHT_DECAY),
            name='fc1000')(x)
        # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a
        # single line of code.
        x = backend.cast(x, 'float32')
        x = layers.Activation('softmax')(x)

        # Create model.
        return models.Model(img_input, x, name='resnet50')
Esempio n. 45
0
def _normal_a_cell(ip, p, filters, block_id=None):
    """Adds a Normal cell for NASNet-A (Fig. 4 in the paper).

  Args:
      ip: Input tensor `x`
      p: Input tensor `p`
      filters: Number of output filters
      block_id: String block_id

  Returns:
      A Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('normal_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          name='normal_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        h = layers.BatchNormalization(axis=channel_dim,
                                      momentum=0.9997,
                                      epsilon=1e-3,
                                      name='normal_bn_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters,
                                         kernel_size=(5, 5),
                                         block_id='normal_left1_%s' % block_id)
            x1_2 = _separable_conv_block(p,
                                         filters,
                                         block_id='normal_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         block_id='normal_left2_%s' % block_id)
            x2_2 = _separable_conv_block(p,
                                         filters, (3, 3),
                                         block_id='normal_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_left3_%s' % (block_id))(h)
            x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_left4_%s' % (block_id))(p)
            x4_2 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_right4_%s' % (block_id))(p)
            x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id)

        with backend.name_scope('block_5'):
            x5 = _separable_conv_block(h,
                                       filters,
                                       block_id='normal_left5_%s' % block_id)
            x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id)

        x = layers.concatenate([p, x1, x2, x3, x4, x5],
                               axis=channel_dim,
                               name='normal_concat_%s' % block_id)
    return x, ip
Esempio n. 46
0
def _depthwise_conv_block(inputs,
                          pointwise_conv_filters,
                          alpha,
                          depth_multiplier=1,
                          strides=(1, 1),
                          block_id=1):
    """Adds a depthwise convolution block.

  A depthwise convolution block consists of a depthwise conv,
  batch normalization, relu6, pointwise convolution,
  batch normalization and relu6 activation.

  Arguments:
    inputs: Input tensor of shape `(rows, cols, channels)` (with
      `channels_last` data format) or (channels, rows, cols) (with
      `channels_first` data format).
    pointwise_conv_filters: Integer, the dimensionality of the output space
      (i.e. the number of output filters in the pointwise convolution).
    alpha: controls the width of the network. - If `alpha` < 1.0,
      proportionally decreases the number of filters in each layer. - If
      `alpha` > 1.0, proportionally increases the number of filters in each
      layer. - If `alpha` = 1, default number of filters from the paper are
      used at each layer.
    depth_multiplier: The number of depthwise convolution output channels
      for each input channel. The total number of depthwise convolution
      output channels will be equal to `filters_in * depth_multiplier`.
    strides: An integer or tuple/list of 2 integers, specifying the strides
      of the convolution along the width and height. Can be a single integer
      to specify the same value for all spatial dimensions. Specifying any
      stride value != 1 is incompatible with specifying any `dilation_rate`
      value != 1.
    block_id: Integer, a unique identification designating the block number.
      # Input shape
    4D tensor with shape: `(batch, channels, rows, cols)` if
      data_format='channels_first'
    or 4D tensor with shape: `(batch, rows, cols, channels)` if
      data_format='channels_last'. # Output shape
    4D tensor with shape: `(batch, filters, new_rows, new_cols)` if
      data_format='channels_first'
    or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if
      data_format='channels_last'. `rows` and `cols` values might have
      changed due to stride.

  Returns:
    Output tensor of block.
  """
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
    pointwise_conv_filters = int(pointwise_conv_filters * alpha)

    if strides == (1, 1):
        x = inputs
    else:
        x = layers.ZeroPadding2D(((0, 1), (0, 1)),
                                 name='conv_pad_%d' % block_id)(inputs)
    x = layers.DepthwiseConv2D(
        (3, 3),
        padding='same' if strides == (1, 1) else 'valid',
        depth_multiplier=depth_multiplier,
        strides=strides,
        use_bias=False,
        name='conv_dw_%d' % block_id)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='conv_dw_%d_bn' % block_id)(x)
    x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)

    x = layers.Conv2D(pointwise_conv_filters, (1, 1),
                      padding='same',
                      use_bias=False,
                      strides=(1, 1),
                      name='conv_pw_%d' % block_id)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='conv_pw_%d_bn' % block_id)(x)
    return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
Esempio n. 47
0
def _adjust_block(p, ip, filters, block_id=None):
    """Adjusts the input `previous path` to match the shape of the `input`.

  Used in situations where the output number of filters needs to be changed.

  Args:
      p: Input tensor which needs to be modified
      ip: Input tensor whose shape needs to be matched
      filters: Number of output filters to be matched
      block_id: String block_id

  Returns:
      Adjusted Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    img_dim = 2 if backend.image_data_format() == 'channels_first' else -2

    ip_shape = backend.int_shape(ip)

    if p is not None:
        p_shape = backend.int_shape(p)

    with backend.name_scope('adjust_block'):
        if p is None:
            p = ip

        elif p_shape[img_dim] != ip_shape[img_dim]:
            with backend.name_scope('adjust_reduction_block_%s' % block_id):
                p = layers.Activation('relu',
                                      name='adjust_relu_1_%s' % block_id)(p)
                p1 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_1_%s' % block_id)(p)
                p1 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_1_%s' % block_id,
                                   kernel_initializer='he_normal')(p1)

                p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
                p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2)
                p2 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_2_%s' % block_id)(p2)
                p2 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_2_%s' % block_id,
                                   kernel_initializer='he_normal')(p2)

                p = layers.concatenate([p1, p2], axis=channel_dim)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)

        elif p_shape[channel_dim] != filters:
            with backend.name_scope('adjust_projection_block_%s' % block_id):
                p = layers.Activation('relu')(p)
                p = layers.Conv2D(filters, (1, 1),
                                  strides=(1, 1),
                                  padding='same',
                                  name='adjust_conv_projection_%s' % block_id,
                                  use_bias=False,
                                  kernel_initializer='he_normal')(p)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)
    return p
Esempio n. 48
0
"""
settings.py

Settings file for saving shared constants
"""

from tensorflow.python.keras import backend as K

IMAGE_DATA_FORMAT = K.image_data_format()
CHANNELS_FIRST = IMAGE_DATA_FORMAT == 'channels_first'
CHANNELS_LAST = IMAGE_DATA_FORMAT == 'channels_last'
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x


base_image = K.variable(preprocess_image(input_image))
style_reference_image = K.variable(preprocess_image(style_image))

if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))

input_tensor = K.concatenate(
    [base_image, style_reference_image, combination_image], axis=0)

model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet',
                    include_top=False)
print('Model loaded.')

outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
print(outputs_dict)
Esempio n. 50
0
def Xception(include_top=True,
             weights='imagenet',
             input_tensor=None,
             input_shape=None,
             pooling=None,
             classes=1000,
             classifier_activation='softmax'):
    """Instantiates the Xception architecture.

  Reference:
  - [Xception: Deep Learning with Depthwise Separable Convolutions](
      https://arxiv.org/abs/1610.02357) (CVPR 2017)

  Optionally loads weights pre-trained on ImageNet.
  Note that the data format convention used by the model is
  the one specified in your Keras config at `~/.keras/keras.json`.
  Note that the default input image size for this model is 299x299.

  Caution: Be sure to properly pre-process your inputs to the application.
  Please see `applications.xception.preprocess_input` for an example.

  Arguments:
    include_top: whether to include the fully-connected
      layer at the top of the network.
    weights: one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor
      (i.e. output of `layers.Input()`)
      to use as image input for the model.
    input_shape: optional shape tuple, only to be specified
      if `include_top` is False (otherwise the input shape
      has to be `(299, 299, 3)`.
      It should have exactly 3 inputs channels,
      and width and height should be no smaller than 71.
      E.g. `(150, 150, 3)` would be one valid value.
    pooling: Optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model will be
          the 4D tensor output of the
          last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a 2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: optional number of classes to classify images
      into, only to be specified if `include_top` is True,
      and if no `weights` argument is specified.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=299,
        min_size=71,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    x = layers.Conv2D(32, (3, 3),
                      strides=(2, 2),
                      use_bias=False,
                      name='block1_conv1')(img_input)
    x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x)
    x = layers.Activation('relu', name='block1_conv1_act')(x)
    x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
    x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x)
    x = layers.Activation('relu', name='block1_conv2_act')(x)

    residual = layers.Conv2D(128, (1, 1),
                             strides=(2, 2),
                             padding='same',
                             use_bias=False)(x)
    residual = layers.BatchNormalization(axis=channel_axis)(residual)

    x = layers.SeparableConv2D(128, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block2_sepconv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block2_sepconv1_bn')(x)
    x = layers.Activation('relu', name='block2_sepconv2_act')(x)
    x = layers.SeparableConv2D(128, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block2_sepconv2')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block2_sepconv2_bn')(x)

    x = layers.MaxPooling2D((3, 3),
                            strides=(2, 2),
                            padding='same',
                            name='block2_pool')(x)
    x = layers.add([x, residual])

    residual = layers.Conv2D(256, (1, 1),
                             strides=(2, 2),
                             padding='same',
                             use_bias=False)(x)
    residual = layers.BatchNormalization(axis=channel_axis)(residual)

    x = layers.Activation('relu', name='block3_sepconv1_act')(x)
    x = layers.SeparableConv2D(256, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block3_sepconv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block3_sepconv1_bn')(x)
    x = layers.Activation('relu', name='block3_sepconv2_act')(x)
    x = layers.SeparableConv2D(256, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block3_sepconv2')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block3_sepconv2_bn')(x)

    x = layers.MaxPooling2D((3, 3),
                            strides=(2, 2),
                            padding='same',
                            name='block3_pool')(x)
    x = layers.add([x, residual])

    residual = layers.Conv2D(728, (1, 1),
                             strides=(2, 2),
                             padding='same',
                             use_bias=False)(x)
    residual = layers.BatchNormalization(axis=channel_axis)(residual)

    x = layers.Activation('relu', name='block4_sepconv1_act')(x)
    x = layers.SeparableConv2D(728, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block4_sepconv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block4_sepconv1_bn')(x)
    x = layers.Activation('relu', name='block4_sepconv2_act')(x)
    x = layers.SeparableConv2D(728, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block4_sepconv2')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block4_sepconv2_bn')(x)

    x = layers.MaxPooling2D((3, 3),
                            strides=(2, 2),
                            padding='same',
                            name='block4_pool')(x)
    x = layers.add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x)
        x = layers.SeparableConv2D(728, (3, 3),
                                   padding='same',
                                   use_bias=False,
                                   name=prefix + '_sepconv1')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      name=prefix + '_sepconv1_bn')(x)
        x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x)
        x = layers.SeparableConv2D(728, (3, 3),
                                   padding='same',
                                   use_bias=False,
                                   name=prefix + '_sepconv2')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      name=prefix + '_sepconv2_bn')(x)
        x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x)
        x = layers.SeparableConv2D(728, (3, 3),
                                   padding='same',
                                   use_bias=False,
                                   name=prefix + '_sepconv3')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      name=prefix + '_sepconv3_bn')(x)

        x = layers.add([x, residual])

    residual = layers.Conv2D(1024, (1, 1),
                             strides=(2, 2),
                             padding='same',
                             use_bias=False)(x)
    residual = layers.BatchNormalization(axis=channel_axis)(residual)

    x = layers.Activation('relu', name='block13_sepconv1_act')(x)
    x = layers.SeparableConv2D(728, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block13_sepconv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block13_sepconv1_bn')(x)
    x = layers.Activation('relu', name='block13_sepconv2_act')(x)
    x = layers.SeparableConv2D(1024, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block13_sepconv2')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block13_sepconv2_bn')(x)

    x = layers.MaxPooling2D((3, 3),
                            strides=(2, 2),
                            padding='same',
                            name='block13_pool')(x)
    x = layers.add([x, residual])

    x = layers.SeparableConv2D(1536, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block14_sepconv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block14_sepconv1_bn')(x)
    x = layers.Activation('relu', name='block14_sepconv1_act')(x)

    x = layers.SeparableConv2D(2048, (3, 3),
                               padding='same',
                               use_bias=False,
                               name='block14_sepconv2')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  name='block14_sepconv2_bn')(x)
    x = layers.Activation('relu', name='block14_sepconv2_act')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = training.Model(inputs, x, name='xception')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = data_utils.get_file(
                'xception_weights_tf_dim_ordering_tf_kernels.h5',
                TF_WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='0a58e3b7378bc2990ea3b43d5981f1f6')
        else:
            weights_path = data_utils.get_file(
                'xception_weights_tf_dim_ordering_tf_kernels_notop.h5',
                TF_WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='b0042744bf5b25fce3cb969f33bebb97')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 51
0
def pixelwise_transform(mask,
                        dilation_radius=None,
                        data_format=None,
                        separate_edge_classes=False):
    """Transforms a label mask for a z stack edge, interior, and background

    Args:
        mask (tensor): tensor of labels
        dilation_radius (int):  width to enlarge the edge feature of
            each instance
        data_format (str): 'channels_first' or 'channels_last'
        separate_edge_classes (bool): Whether to separate the cell edge class
            into 2 distinct cell-cell edge and cell-background edge classes.

    Returns:
        numpy.array: one-hot encoded tensor of masks:
            if not separate_edge_classes: [cell_edge, cell_interior, background]
            otherwise: [bg_cell_edge, cell_cell_edge, cell_interior, background]
    """
    if data_format is None:
        data_format = K.image_data_format()

    if data_format == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = len(mask.shape) - 1

    mask = np.squeeze(mask, axis=channel_axis)

    # Detect the edges and interiors
    new_masks = np.zeros(mask.shape)
    edges = np.zeros(mask.shape)
    strel = ball(1) if mask.ndim > 3 else disk(1)
    for cell_label in np.unique(mask):
        if cell_label != 0:
            for i in range(mask.shape[0]):
                # get the cell interior
                img = mask[i] == cell_label
                img = binary_erosion(img, strel)
                new_masks[i] += img

    interiors = np.multiply(new_masks, mask)
    edges = (mask - interiors > 0).astype('int')
    interiors = (interiors > 0).astype('int')

    if not separate_edge_classes:
        if dilation_radius:
            dil_strel = ball(dilation_radius) if mask.ndim > 3 else disk(
                dilation_radius)
            # Thicken cell edges to be more pronounced
            for i in range(edges.shape[0]):
                edges[i] = binary_dilation(edges[i], selem=dil_strel)

            # Thin the augmented edges by subtracting the interior features.
            edges = (edges - interiors > 0).astype('int')

        background = (1 - edges - interiors > 0)
        background = background.astype('int')

        all_stacks = [edges, interiors, background]

        return np.stack(all_stacks, axis=channel_axis)

    # dilate the background masks and subtract from all edges for background-edges
    dilated_background = np.zeros(mask.shape)
    for i in range(mask.shape[0]):
        background = (mask[i] == 0).astype('int')
        dilated_background[i] = binary_dilation(background, strel)

    background_edges = (edges - dilated_background > 0).astype('int')

    # edges that are not background-edges are interior-edges
    interior_edges = (edges - background_edges > 0).astype('int')

    if dilation_radius:
        dil_strel = ball(dilation_radius) if mask.ndim > 3 else disk(
            dilation_radius)
        # Thicken cell edges to be more pronounced
        for i in range(edges.shape[0]):
            interior_edges[i] = binary_dilation(interior_edges[i],
                                                selem=dil_strel)
            background_edges[i] = binary_dilation(background_edges[i],
                                                  selem=dil_strel)

        # Thin the augmented edges by subtracting the interior features.
        interior_edges = (interior_edges - interiors > 0).astype('int')
        background_edges = (background_edges - interiors > 0).astype('int')

    background = (1 - background_edges - interior_edges - interiors > 0)
    background = background.astype('int')

    all_stacks = [background_edges, interior_edges, interiors, background]

    return np.stack(all_stacks, axis=channel_axis)
Esempio n. 52
0
def train_model_siamese_daughter(model,
                                 dataset,
                                 expt='',
                                 test_size=.2,
                                 n_epoch=100,
                                 batch_size=1,
                                 num_gpus=None,
                                 crop_dim=32,
                                 min_track_length=1,
                                 neighborhood_scale_size=10,
                                 features=None,
                                 optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
                                 log_dir='/data/tensorboard_logs',
                                 model_dir='/data/models',
                                 model_name=None,
                                 focal=False,
                                 gamma=0.5,
                                 lr_sched=rate_scheduler(lr=0.01, decay=0.95),
                                 rotation_range=0,
                                 flip=True,
                                 shear=0,
                                 zoom_range=0,
                                 seed=0,
                                 **kwargs):
    is_channels_first = K.image_data_format() == 'channels_first'

    if model_name is None:
        todays_date = datetime.datetime.now().strftime('%Y-%m-%d')
        data_name = os.path.splitext(os.path.basename(dataset))[0]
        model_name = '{}_{}_[{}]_neighs={}_epochs={}_seed={}_{}'.format(
            todays_date, data_name, ','.join(f[0] for f in sorted(features)),
            neighborhood_scale_size, n_epoch, seed, expt)
    model_path = os.path.join(model_dir, '{}.h5'.format(model_name))
    loss_path = os.path.join(model_dir, '{}.npz'.format(model_name))

    print('training on dataset:', dataset)
    print('saving model at:', model_path)
    print('saving loss at:', loss_path)

    train_dict, val_dict = get_data(dataset, mode='siamese_daughters',
                                    seed=seed, test_size=test_size)

    # the data, shuffled and split between train and test sets
    print('X_train shape:', train_dict['X'].shape)
    print('y_train shape:', train_dict['y'].shape)
    print('X_test shape:', val_dict['X'].shape)
    print('y_test shape:', val_dict['y'].shape)
    print('Output Shape:', model.layers[-1].output_shape)

    n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1]

    def loss_function(y_true, y_pred):
        if focal:
            return losses.weighted_focal_loss(y_true, y_pred,
                                              gamma=gamma,
                                              n_classes=n_classes,
                                              from_logits=False)
        return losses.weighted_categorical_crossentropy(y_true, y_pred,
                                                        n_classes=n_classes,
                                                        from_logits=False)

    if num_gpus is None:
        num_gpus = train_utils.count_gpus()

    if num_gpus >= 2:
        batch_size = batch_size * num_gpus
        model = train_utils.MultiGpuModel(model, num_gpus)

    print('Training on {} GPUs'.format(num_gpus))

    model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

    print('Using real-time data augmentation.')

    # this will do preprocessing and realtime data augmentation
    datagen = image_generators.SiameseDataGenerator(
        rotation_range=rotation_range,
        shear_range=shear,
        zoom_range=zoom_range,
        horizontal_flip=flip,
        vertical_flip=flip)

    datagen_val = image_generators.SiameseDataGenerator(
        rotation_range=0,
        zoom_range=0,
        shear_range=0,
        horizontal_flip=0,
        vertical_flip=0)

    total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0)
    total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=5.0)

    # total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=0.5)
    # total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=0.5)

    train_data = datagen.flow(
        train_dict,
        seed=seed,
        crop_dim=crop_dim,
        batch_size=batch_size,
        min_track_length=min_track_length,
        neighborhood_scale_size=neighborhood_scale_size,
        features=features)

    val_data = datagen_val.flow(
        val_dict,
        seed=seed,
        crop_dim=crop_dim,
        batch_size=batch_size,
        min_track_length=min_track_length,
        neighborhood_scale_size=neighborhood_scale_size,
        features=features)

    print('total_train_pairs:', total_train_pairs)
    print('total_test_pairs:', total_test_pairs)
    print('batch size:', batch_size)
    print('validation_steps: ', total_test_pairs // batch_size)

    train_callbacks = get_callbacks(
        model_path, lr_sched=lr_sched,
        tensorboard_log_dir=log_dir,
        save_weights_only=num_gpus >= 2,
        monitor='val_loss', verbose=1)

    # fit the model on the batches generated by datagen.flow()
    loss_history = model.fit_generator(
        train_data,
        steps_per_epoch=total_train_pairs // batch_size,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=total_test_pairs // batch_size,
        callbacks=train_callbacks)

    model.save_weights(model_path)
    np.savez(loss_path, loss_history=loss_history.history)

    return model
Esempio n. 53
0
def train_model_sample(model,
                       dataset,
                       expt='',
                       test_size=.2,
                       n_epoch=10,
                       batch_size=32,
                       num_gpus=None,
                       transform=None,
                       window_size=None,
                       balance_classes=True,
                       max_class_samples=None,
                       log_dir='/data/tensorboard_logs',
                       model_dir='/data/models',
                       model_name=None,
                       focal=False,
                       gamma=0.5,
                       optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
                       lr_sched=rate_scheduler(lr=0.01, decay=0.95),
                       rotation_range=0,
                       flip=False,
                       shear=0,
                       zoom_range=0,
                       seed=0,
                       **kwargs):
    """Train a model using sample mode.

    Args:
        model (tensorflow.keras.Model): The model to train.
        dataset (str): Path to a dataset to train the model with.
        expt (str): Experiment, substring to include in model name.
        test_size (float): Percent of data to leave as test data.
        n_epoch (int): Number of training epochs.
        batch_size (int): Number of batches per training step.
        num_gpus (int): The number of GPUs to train on.
        transform (str): Defines the transformation of the training data.
            One of 'watershed', 'fgbg', 'pixelwise'.
        window_size (tuple(int, int)): Size of sampling window
        balance_classes (bool): Whether to perform class-balancing on data
        max_class_samples (int): Maximum number of examples per class to sample
        log_dir (str): Filepath to save tensorboard logs. If None, disables
            the tensorboard callback.
        model_dir (str): Directory to save the model file.
        model_name (str): Name of the model (and name of output file).
        focal (bool): If true, uses focal loss.
        gamma (float): Parameter for focal loss
        optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.)
        lr_sched (function): Learning rate schedular function
        rotation_range (int): Maximum rotation range for image augmentation
        flip (bool): Enables horizontal and vertical flipping for augmentation
        shear (int): Maximum rotation range for image augmentation
        zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2)
        seed (int): Random seed
        kwargs (dict): Other parameters to pass to _transform_masks

    Returns:
        tensorflow.keras.Model: The trained model
    """
    is_channels_first = K.image_data_format() == 'channels_first'

    if model_name is None:
        todays_date = datetime.datetime.now().strftime('%Y-%m-%d')
        data_name = os.path.splitext(os.path.basename(dataset))[0]
        model_name = '{}_{}_{}'.format(todays_date, data_name, expt)
    model_path = os.path.join(model_dir, '{}.h5'.format(model_name))
    loss_path = os.path.join(model_dir, '{}.npz'.format(model_name))

    # train_dict, test_dict = get_data(dataset, test_size=test_size, seed=seed)

    train_dict, test_dict = get_data_from_path(dataset, patch_crop=False)
    n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1]

    # the data, shuffled and split between train and test sets
    print('X_train shape:', train_dict['X'].shape)
    print('y_train shape:', train_dict['y'].shape)
    print('X_test shape:', test_dict['X'].shape)
    print('y_test shape:', test_dict['y'].shape)
    print('Output Shape:', model.layers[-1].output_shape)
    print('Number of Classes:', n_classes)

    def loss_function(y_true, y_pred):
        if isinstance(transform, str) and transform.lower() == 'disc':
            return losses.discriminative_instance_loss(y_true, y_pred)
        if focal:
            return losses.weighted_focal_loss(
                y_true, y_pred, gamma=gamma, n_classes=n_classes)
        return losses.weighted_categorical_crossentropy(
            y_true, y_pred, n_classes=n_classes)

    if num_gpus is None:
        num_gpus = train_utils.count_gpus()

    if num_gpus >= 2:
        batch_size = batch_size * num_gpus
        model = train_utils.MultiGpuModel(model, num_gpus)

    print('Training on {} GPUs'.format(num_gpus))

    model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])

    if train_dict['X'].ndim == 4:
        DataGenerator = image_generators.SampleDataGenerator
        window_size = window_size if window_size else (30, 30)
    elif train_dict['X'].ndim == 5:
        DataGenerator = image_generators.SampleMovieDataGenerator
        window_size = window_size if window_size else (30, 30, 3)
    else:
        raise ValueError('Expected `X` to have ndim 4 or 5. Got',
                         train_dict['X'].ndim)

    # this will do preprocessing and realtime data augmentation
    datagen = DataGenerator(
        rotation_range=rotation_range,
        shear_range=shear,
        zoom_range=zoom_range,
        horizontal_flip=flip,
        vertical_flip=flip)

    # no validation augmentation
    datagen_val = DataGenerator(
        rotation_range=0,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=0,
        vertical_flip=0)

    train_data = datagen.flow(
        train_dict,
        seed=seed,
        batch_size=batch_size,
        transform=transform,
        transform_kwargs=kwargs,
        window_size=window_size,
        balance_classes=balance_classes,
        max_class_samples=max_class_samples)
        # save_to_dir='./training_data',
        # save_prefix='t',
        # save_format='tif')

    val_data = datagen_val.flow(
        test_dict,
        seed=seed,
        batch_size=batch_size,
        transform=transform,
        transform_kwargs=kwargs,
        window_size=window_size,
        balance_classes=False,
        max_class_samples=max_class_samples)

    train_callbacks = get_callbacks(
        model_path, lr_sched=lr_sched,
        tensorboard_log_dir=log_dir,
        save_weights_only=num_gpus >= 2,
        monitor='val_loss', verbose=1)

    # fit the model on the batches generated by datagen.flow()
    loss_history = model.fit_generator(
        train_data,
        steps_per_epoch=train_data.y.shape[0] // batch_size,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=val_data.y.shape[0] // batch_size,
        callbacks=train_callbacks)

    np.savez(loss_path, loss_history=loss_history.history)

    return model
Esempio n. 54
0
def train_model_retinanet(model,
                          dataset,
                          expt='',
                          test_size=.2,
                          n_epoch=10,
                          batch_size=1,
                          num_gpus=None,
                          include_masks=False,
                          panoptic=False,
                          panoptic_weight=0.1,
                          transforms=['watershed'],
                          transforms_kwargs={},
                          anchor_params=None,
                          pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                          min_objects=3,
                          mask_size=(28, 28),
                          optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
                          log_dir='/data/tensorboard_logs',
                          model_dir='/data/models',
                          model_name=None,
                          sigma=3.0,
                          alpha=0.25,
                          gamma=2.0,
                          score_threshold=0.01,
                          iou_threshold=0.5,
                          max_detections=100,
                          weighted_average=True,
                          lr_sched=rate_scheduler(lr=0.01, decay=0.95),
                          rotation_range=0,
                          flip=True,
                          shear=0,
                          zoom_range=0,
                          compute_map=True,
                          seed=0,
                          semantic_only=False,
                          **kwargs):
    """Train a RetinaNet model from the given backbone.

    Adapted from:
        https://github.com/fizyr/keras-retinanet &
        https://github.com/fizyr/keras-maskrcnn

    Args:
        model (tensorflow.keras.Model): The model to train.
        dataset (str): Path to a dataset to train the model with.
        expt (str): Experiment, substring to include in model name.
        test_size (float): Percent of data to leave as test data.
        n_epoch (int): Number of training epochs.
        batch_size (int): Number of batches per training step.
        num_gpus (int): The number of GPUs to train on.
        include_masks (bool): Whether to generate masks using MaskRCNN.
        panoptic (bool): Whether to include semantic segmentation heads.
        panoptic_weight (float): Weight applied to the semantic loss.
        transforms (list): List of transform names as strings. Each transform
            will have its own semantic segmentation head.
        transforms_kwargs (list): List of dicts of optional values for each
            transform in transforms.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
            If None, default values are used.
        pyramid_levels (list): Pyramid levels to attach
            the object detection heads to.
        min_objects (int): If a training image has fewer than min_objects
            objects, the image will not be used for training.
        mask_size (tuple): The size of the masks.
        log_dir (str): Filepath to save tensorboard logs. If None, disables
            the tensorboard callback.
        model_dir (str): Directory to save the model file.
        model_name (str): Name of the model (and name of output file).
        sigma (float): The point where the loss changes from L2 to L1.
        alpha (float): Scale the focal weight with alpha.
        gamma (float): Take the power of the focal weight with gamma.
        iou_threshold (float): The threshold used to consider when a detection
            is positive or negative.
        score_threshold (float): The score confidence threshold
            to use for detections.
        max_detections (int): The maximum number of detections to use per image
        weighted_average (bool): Use a weighted average in evaluation.
        optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.)
        lr_sched (function): Learning rate schedular function
        rotation_range (int): Maximum rotation range for image augmentation
        flip (bool): Enables horizontal and vertical flipping for augmentation
        shear (int): Maximum rotation range for image augmentation
        zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2)
        seed (int): Random seed
        compute_map (bool): Whether to compute mAP at end of training.
        kwargs (dict): Other parameters to pass to _transform_masks

    Returns:
        tensorflow.keras.Model: The trained model
    """

    is_channels_first = K.image_data_format() == 'channels_first'

    if model_name is None:
        todays_date = datetime.datetime.now().strftime('%Y-%m-%d')
        data_name = os.path.splitext(os.path.basename(dataset))[0]
        model_name = '{}_{}_{}'.format(todays_date, data_name, expt)

    model_path = os.path.join(model_dir, '{}.h5'.format(model_name))
    loss_path = os.path.join(model_dir, '{}.npz'.format(model_name))

    train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size)

    channel_axis = 1 if is_channels_first else -1
    n_classes = model.layers[-1].output_shape[channel_axis]

    if panoptic:
        n_semantic_classes = [layer.output_shape[channel_axis]
                              for layer in model.layers if 'semantic' in layer.name]
    else:
        n_semantic_classes = []

    # the data, shuffled and split between train and test sets
    print('X_train shape:', train_dict['X'].shape)
    print('y_train shape:', train_dict['y'].shape)
    print('X_test shape:', test_dict['X'].shape)
    print('y_test shape:', test_dict['y'].shape)
    print('Output Shape:', model.layers[-1].output_shape)
    print('Number of Classes:', n_classes)

    if num_gpus is None:
        num_gpus = train_utils.count_gpus()

    if num_gpus >= 1e6:
        batch_size = batch_size * num_gpus
        model = train_utils.MultiGpuModel(model, num_gpus)

    print('Training on {} GPUs'.format(num_gpus))

    # evaluation of model is done on `retinanet_bbox`
    if include_masks:
        prediction_model = model
    else:
        prediction_model = retinanet_bbox(
            model,
            nms=True,
            anchor_params=anchor_params,
            num_semantic_heads=len(n_semantic_classes),
            panoptic=panoptic,
            class_specific_filter=False)

    retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma,
                                              iou_threshold=iou_threshold,
                                              mask_size=mask_size)

    def semantic_loss(n_classes):
        def _semantic_loss(y_pred, y_true):
            if n_classes > 1:
                return panoptic_weight * losses.weighted_categorical_crossentropy(
                    y_pred, y_true, n_classes=n_classes)
            return panoptic_weight * MSE(y_pred, y_true)
        return _semantic_loss

    loss = {
        'regression': retinanet_losses.regress_loss,
        'classification': retinanet_losses.classification_loss
    }

    if include_masks:
        loss['masks'] = retinanet_losses.mask_loss

    if panoptic:
        # Give losses for all of the semantic heads
        for layer in model.layers:
            if 'semantic' in layer.name:
                n_classes = layer.output_shape[channel_axis]
                loss[layer.name] = semantic_loss(n_classes)

    model.compile(loss=loss, optimizer=optimizer)

    if num_gpus >= 2:
        # Each GPU must have at least one validation example
        if test_dict['y'].shape[0] < num_gpus:
            raise ValueError('Not enough validation data for {} GPUs. '
                             'Received {} validation sample.'.format(
                                 test_dict['y'].shape[0], num_gpus))

        # When using multiple GPUs and skip_connections,
        # the training data must be evenly distributed across all GPUs
        num_train = train_dict['y'].shape[0]
        nb_samples = num_train - num_train % batch_size
        if nb_samples:
            train_dict['y'] = train_dict['y'][:nb_samples]
            train_dict['X'] = train_dict['X'][:nb_samples]

    # this will do preprocessing and realtime data augmentation
    datagen = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=rotation_range,
        shear_range=shear,
        zoom_range=zoom_range,
        horizontal_flip=flip,
        vertical_flip=flip)

    datagen_val = image_generators.RetinaNetGenerator(
        # fill_mode='constant',  # for rotations
        rotation_range=0,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=0,
        vertical_flip=0)

    # if 'vgg' in backbone or 'densenet' in backbone:
    #     compute_shapes = make_shapes_callback(model)
    # else:
    #     compute_shapes = guess_shapes

    compute_shapes = guess_shapes

    train_data = datagen.flow(
        train_dict,
        seed=seed,
        include_masks=include_masks,
        panoptic=panoptic,
        transforms=transforms,
        transforms_kwargs=transforms_kwargs,
        pyramid_levels=pyramid_levels,
        min_objects=min_objects,
        anchor_params=anchor_params,
        compute_shapes=compute_shapes,
        semantic_only=semantic_only,
        batch_size=batch_size)

    val_data = datagen_val.flow(
        test_dict,
        seed=seed,
        include_masks=include_masks,
        panoptic=panoptic,
        transforms=transforms,
        transforms_kwargs=transforms_kwargs,
        pyramid_levels=pyramid_levels,
        min_objects=min_objects,
        anchor_params=anchor_params,
        compute_shapes=compute_shapes,
        semantic_only=semantic_only,
        batch_size=batch_size)

    train_callbacks = get_callbacks(
        model_path, lr_sched=lr_sched,
        tensorboard_log_dir=log_dir,
        save_weights_only=num_gpus >= 2,
        monitor='val_loss', verbose=1)

    eval_callback = RedirectModel(
        Evaluate(val_data,
                 iou_threshold=iou_threshold,
                 score_threshold=score_threshold,
                 max_detections=max_detections,
                 tensorboard=train_callbacks[-1] if log_dir else None,
                 weighted_average=weighted_average),
        prediction_model)

    train_callbacks.append(eval_callback)

    # fit the model on the batches generated by datagen.flow()
    loss_history = model.fit_generator(
        train_data,
        steps_per_epoch=train_data.y.shape[0] // batch_size,
        epochs=n_epoch,
        validation_data=val_data,
        validation_steps=val_data.y.shape[0] // batch_size,
        callbacks=train_callbacks)

    model.save_weights(model_path)
    np.savez(loss_path, loss_history=loss_history.history)

    if compute_map:
        average_precisions = evaluate(
            val_data,
            prediction_model,
            iou_threshold=iou_threshold,
            score_threshold=score_threshold,
            max_detections=max_detections,
        )

        # print evaluation
        total_instances = []
        precisions = []
        for label, (average_precision, num_annotations) in average_precisions.items():
            print('{:.0f} instances of class'.format(num_annotations),
                  label, 'with average precision: {:.4f}'.format(average_precision))
            total_instances.append(num_annotations)
            precisions.append(average_precision)

        if sum(total_instances) == 0:
            print('No test instances found.')
        else:
            print('mAP using the weighted average of precisions among classes: {:.4f}'.format(
                sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
            print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))

    return model
Esempio n. 55
0
def block3(x,
           filters,
           kernel_size=3,
           stride=1,
           groups=32,
           conv_shortcut=True,
           name=None):
    """A residual block.

  Args:
    x: input tensor.
    filters: integer, filters of the bottleneck layer.
    kernel_size: default 3, kernel size of the bottleneck layer.
    stride: default 1, stride of the first layer.
    groups: default 32, group size for grouped convolution.
    conv_shortcut: default True, use convolution shortcut if True,
        otherwise identity shortcut.
    name: string, block label.

  Returns:
    Output tensor for the residual block.
  """
    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    if conv_shortcut:
        shortcut = layers.Conv2D((64 // groups) * filters,
                                 1,
                                 strides=stride,
                                 use_bias=False,
                                 name=name + '_0_conv')(x)
        shortcut = layers.BatchNormalization(axis=bn_axis,
                                             epsilon=1.001e-5,
                                             name=name + '_0_bn')(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  epsilon=1.001e-5,
                                  name=name + '_1_bn')(x)
    x = layers.Activation('relu', name=name + '_1_relu')(x)

    c = filters // groups
    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
    x = layers.DepthwiseConv2D(kernel_size,
                               strides=stride,
                               depth_multiplier=c,
                               use_bias=False,
                               name=name + '_2_conv')(x)
    x_shape = backend.shape(x)[:-1]
    x = backend.reshape(x, backend.concatenate([x_shape, (groups, c, c)]))
    x = layers.Lambda(lambda x: sum(x[:, :, :, :, i] for i in range(c)),
                      name=name + '_2_reduce')(x)
    x = backend.reshape(x, backend.concatenate([x_shape, (filters, )]))
    x = layers.BatchNormalization(axis=bn_axis,
                                  epsilon=1.001e-5,
                                  name=name + '_2_bn')(x)
    x = layers.Activation('relu', name=name + '_2_relu')(x)

    x = layers.Conv2D((64 // groups) * filters,
                      1,
                      use_bias=False,
                      name=name + '_3_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  epsilon=1.001e-5,
                                  name=name + '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.Activation('relu', name=name + '_out')(x)
    return x
Esempio n. 56
0
def ResNet50(num_classes):
    """Instantiates the ResNet50 architecture.

  Args:
    num_classes: `int` number of classes for image classification.

  Returns:
      A Keras model instance.
  """
    # Determine proper input shape
    if backend.image_data_format() == 'channels_first':
        input_shape = (3, 224, 224)
        bn_axis = 1
    else:
        input_shape = (224, 224, 3)
        bn_axis = 3

    img_input = layers.Input(shape=input_shape)
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    x = layers.Conv2D(64, (7, 7),
                      use_bias=False,
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name='conv1')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name='bn_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(num_classes,
                     activation='softmax',
                     kernel_initializer=initializers.RandomNormal(stddev=0.01),
                     kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                     bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                     name='fc1000')(x)

    # Create model.
    return models.Model(img_input, x, name='resnet50')
Esempio n. 57
0
def ResNet(stack_fn,
           preact,
           use_bias,
           model_name='resnet',
           include_top=True,
           weights='imagenet',
           input_tensor=None,
           input_shape=None,
           pooling=None,
           classes=1000,
           classifier_activation='softmax',
           **kwargs):
    """Instantiates the ResNet, ResNetV2, and ResNeXt architecture.

  Reference:
  - [Deep Residual Learning for Image Recognition](
      https://arxiv.org/abs/1512.03385) (CVPR 2015)

  Optionally loads weights pre-trained on ImageNet.
  Note that the data format convention used by the model is
  the one specified in your Keras config at `~/.keras/keras.json`.

  Args:
    stack_fn: a function that returns output tensor for the
      stacked residual blocks.
    preact: whether to use pre-activation or not
      (True for ResNetV2, False for ResNet and ResNeXt).
    use_bias: whether to use biases for convolutional layers or not
      (True for ResNet and ResNetV2, False for ResNeXt).
    model_name: string, model name.
    include_top: whether to include the fully-connected
      layer at the top of the network.
    weights: one of `None` (random initialization),
      'imagenet' (pre-training on ImageNet),
      or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor
      (i.e. output of `layers.Input()`)
      to use as image input for the model.
    input_shape: optional shape tuple, only to be specified
      if `include_top` is False (otherwise the input shape
      has to be `(224, 224, 3)` (with `channels_last` data format)
      or `(3, 224, 224)` (with `channels_first` data format).
      It should have exactly 3 inputs channels.
    pooling: optional pooling mode for feature extraction
      when `include_top` is `False`.
      - `None` means that the output of the model will be
          the 4D tensor output of the
          last convolutional layer.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional layer, and thus
          the output of the model will be a 2D tensor.
      - `max` means that global max pooling will
          be applied.
    classes: optional number of classes to classify images
      into, only to be specified if `include_top` is True, and
      if no `weights` argument is specified.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
    **kwargs: For backwards compatibility only.
  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    global layers
    if 'layers' in kwargs:
        layers = kwargs.pop('layers')
    else:
        layers = VersionAwareLayers()
    if kwargs:
        raise ValueError('Unknown argument(s): %s' % (kwargs, ))
    if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)),
                             name='conv1_pad')(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias,
                      name='conv1_conv')(x)

    if not preact:
        x = layers.BatchNormalization(axis=bn_axis,
                                      epsilon=1.001e-5,
                                      name='conv1_bn')(x)
        x = layers.Activation('relu', name='conv1_relu')(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
    x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)

    x = stack_fn(x)

    if preact:
        x = layers.BatchNormalization(axis=bn_axis,
                                      epsilon=1.001e-5,
                                      name='post_bn')(x)
        x = layers.Activation('relu', name='post_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Load weights.
    if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES):
        if include_top:
            file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHTS_PATH + file_name,
                                           cache_subdir='models',
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 58
0
def VGG16_Places365(include_top=True,
                    weights='places',
                    input_tensor=None,
                    input_shape=None,
                    pooling=None,
                    classes=365):
    """Instantiates the VGG16-places365 architecture.

    Optionally loads weights pre-trained
    on Places. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format="channels_last"` in your Keras config
    at ~/.keras/keras.json.

    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
                 'places' (pre-training on Places),
                 or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 244)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 48.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`, or invalid input shape
        """
    if not (weights in {'places', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `places` '
                         '(pre-training on Places), '
                         'or the path to the weights file to be loaded.')

    if weights == 'places' and include_top and classes != 365:
        raise ValueError('If using `weights` as places with `include_top`'
                         ' as true, `classes` should be 365')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Block 1
    x = Conv2D(filters=64,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block1_conv1')(img_input)

    x = Conv2D(filters=64,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block1_conv2')(x)

    x = MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     name="block1_pool",
                     padding='valid')(x)

    # Block 2
    x = Conv2D(filters=128,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block2_conv1')(x)

    x = Conv2D(filters=128,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block2_conv2')(x)

    x = MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     name="block2_pool",
                     padding='valid')(x)

    # Block 3
    x = Conv2D(filters=256,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block3_conv1')(x)

    x = Conv2D(filters=256,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block3_conv2')(x)

    x = Conv2D(filters=256,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block3_conv3')(x)

    x = MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     name="block3_pool",
                     padding='valid')(x)

    # Block 4
    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block4_conv1')(x)

    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block4_conv2')(x)

    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block4_conv3')(x)

    x = MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     name="block4_pool",
                     padding='valid')(x)

    # Block 5
    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block5_conv1')(x)

    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block5_conv2')(x)

    x = Conv2D(filters=512,
               kernel_size=3,
               strides=(1, 1),
               padding='same',
               kernel_regularizer=l2(0.0002),
               activation='relu',
               name='block5_conv3')(x)

    x = MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     name="block5_pool",
                     padding='valid')(x)

    if include_top:
        # Classification block
        x = Flatten(name='flatten')(x)
        x = Dense(4096, activation='relu', name='fc1')(x)
        x = Dropout(0.5, name='drop_fc1')(x)

        x = Dense(4096, activation='relu', name='fc2')(x)
        x = Dropout(0.5, name='drop_fc2')(x)

        x = Dense(365, activation='softmax', name="predictions")(x)

    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='vgg16-places365')

    # load weights
    if weights == 'places':
        if include_top:
            weights_path = get_file(
                'vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models')
        else:
            weights_path = get_file(
                'vgg16-places365_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models')

        model.load_weights(weights_path)

        if K.backend() == 'theano':
            layer_utils.convert_all_kernels_in_model(model)

        if K.image_data_format() == 'channels_first':
            if include_top:
                maxpool = model.get_layer(name='block5_pool')
                shape = maxpool.output_shape[1:]
                dense = model.get_layer(name='fc1')
                layer_utils.convert_dense_weights_data_format(
                    dense, shape, 'channels_first')

            if K.backend() == 'tensorflow':
                warnings.warn('You are using the TensorFlow backend, yet you '
                              'are using the Theano '
                              'image data format convention '
                              '(`image_data_format="channels_first"`). '
                              'For best performance, set '
                              '`image_data_format="channels_last"` in '
                              'your Keras config '
                              'at ~/.keras/keras.json.')

    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 59
0
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
    """Adds an Inception-ResNet block.

  This function builds 3 types of Inception-ResNet blocks mentioned
  in the paper, controlled by the `block_type` argument (which is the
  block name used in the official TF-slim implementation):
  - Inception-ResNet-A: `block_type='block35'`
  - Inception-ResNet-B: `block_type='block17'`
  - Inception-ResNet-C: `block_type='block8'`

  Args:
    x: input tensor.
    scale: scaling factor to scale the residuals (i.e., the output of passing
      `x` through an inception module) before adding them to the shortcut
      branch. Let `r` be the output from the residual branch, the output of this
      block will be `x + scale * r`.
    block_type: `'block35'`, `'block17'` or `'block8'`, determines the network
      structure in the residual branch.
    block_idx: an `int` used for generating layer names. The Inception-ResNet
      blocks are repeated many times in this network. We use `block_idx` to
      identify each of the repetitions. For example, the first
      Inception-ResNet-A block will have `block_type='block35', block_idx=0`,
      and the layer names will have a common prefix `'block35_0'`.
    activation: activation function to use at the end of the block (see
      [activations](../activations.md)). When `activation=None`, no activation
      is applied
      (i.e., "linear" activation: `a(x) = x`).

  Returns:
      Output tensor for the block.

  Raises:
    ValueError: if `block_type` is not one of `'block35'`,
      `'block17'` or `'block8'`.
  """
    if block_type == 'block35':
        branch_0 = conv2d_bn(x, 32, 1)
        branch_1 = conv2d_bn(x, 32, 1)
        branch_1 = conv2d_bn(branch_1, 32, 3)
        branch_2 = conv2d_bn(x, 32, 1)
        branch_2 = conv2d_bn(branch_2, 48, 3)
        branch_2 = conv2d_bn(branch_2, 64, 3)
        branches = [branch_0, branch_1, branch_2]
    elif block_type == 'block17':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 128, 1)
        branch_1 = conv2d_bn(branch_1, 160, [1, 7])
        branch_1 = conv2d_bn(branch_1, 192, [7, 1])
        branches = [branch_0, branch_1]
    elif block_type == 'block8':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(branch_1, 224, [1, 3])
        branch_1 = conv2d_bn(branch_1, 256, [3, 1])
        branches = [branch_0, branch_1]
    else:
        raise ValueError('Unknown Inception-ResNet block type. '
                         'Expects "block35", "block17" or "block8", '
                         'but got: ' + str(block_type))

    block_name = block_type + '_' + str(block_idx)
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3
    mixed = layers.Concatenate(axis=channel_axis,
                               name=block_name + '_mixed')(branches)
    up = conv2d_bn(mixed,
                   backend.int_shape(x)[channel_axis],
                   1,
                   activation=None,
                   use_bias=True,
                   name=block_name + '_conv')

    x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
                      output_shape=backend.int_shape(x)[1:],
                      arguments={'scale': scale},
                      name=block_name)([x, up])
    if activation is not None:
        x = layers.Activation(activation, name=block_name + '_ac')(x)
    return x
Esempio n. 60
0
def semantic_prediction(semantic_names,
                        semantic_features,
                        target_level=0,
                        input_target=None,
                        n_filters=64,
                        n_dense=64,
                        ndim=2,
                        n_classes=3,
                        semantic_id=0):
    """Creates the prediction head from a list of semantic features

    Args:
        semantic_names (list): A list of the names of the semantic feature layers
        semantic_features (list): A list of semantic features
            NOTE: The semantic_names and semantic features should be in decreasing order
            e.g. [Q6, Q5, Q4, ...]
        target_level (int): (Optional) The level we need to reach.
            Performs 2x upsampling until we're at the target level.
        input_target (tensor): Optional tensor with the input image.
        n_filters (int): The number of filters for the 3x3 convolution.
        n_dense (int): The number of filters for dense layers.
        ndim (int): The spatial dimensions of the input data.
            Default is 2, but it also works with 3.
        n_classes (int): The number of classes to be predicted.
        semantic_id (int): Defaults to 0. A number to name the final layer.
            Allows for multiple semantic heads.
    Returns:
        tensor: The softmax prediction for the semantic segmentation head

    Raises:
        ValueError: ndim is not 2 or 3
    """

    acceptable_ndims = [2, 3]
    if ndim not in acceptable_ndims:
        raise ValueError('Only 2 and 3 dimensional networks are supported')

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = -1

    # Add all the semantic layers
    semantic_sum = semantic_features[0]
    for semantic_feature in semantic_features[1:]:
        semantic_sum = Add()([semantic_sum, semantic_feature])

    # Final upsampling
    min_level = int(re.findall(r'\d+', semantic_names[-1])[0])
    n_upsample = min_level - target_level
    x = semantic_upsample(semantic_sum,
                          n_upsample,
                          target=input_target,
                          ndim=ndim)

    # First tensor product
    x = TensorProduct(n_dense)(x)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('relu')(x)

    # Apply tensor product and softmax layer
    if n_classes > 1:
        x = TensorProduct(n_classes)(x)
        x = Softmax(axis=channel_axis,
                    name='semantic_{}'.format(semantic_id))(x)
    else:  # n_classes == 1
        x = TensorProduct(n_classes)(x)
        x = Activation('relu', name='semantic_{}'.format(semantic_id))(x)

    return x