Example #1
0
def _conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4):
    ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout
    Args:
        ip: Input keras tensor
        nb_filter: number of filters
        bottleneck: add bottleneck block
        dropout_rate: dropout rate
        weight_decay: weight decay factor
    Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
    '''
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    with K.name_scope('conv_block'):
        x = BatchNormalization(axis=concat_axis, momentum=0.1, epsilon=1e-5)(ip)
        x = Activation('relu')(x)

        if bottleneck:
            inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua

            x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
                       kernel_regularizer=l2(weight_decay))(x)
            x = BatchNormalization(axis=concat_axis, epsilon=1e-5, momentum=0.1)(x)
            x = Activation('relu')(x)

        x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)

    return x
 def __init__(self, lr=0.01, momentum=0., decay=0.,
              nesterov=False, lr_mult=None, **kwargs):
     super(MultiSGD, self).__init__(**kwargs)
     with K.name_scope(self.__class__.__name__):
         self.iterations = K.variable(0, dtype='int64', name='iterations')
         self.lr = K.variable(lr, name='lr')
         self.momentum = K.variable(momentum, name='momentum')
         self.decay = K.variable(decay, name='decay')
     self.initial_decay = decay
     self.nesterov = nesterov
     self.lr_mult = lr_mult
Example #3
0
def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4, block_prefix=None):
    '''
    Adds a convolution layer (with batch normalization and relu),
    and optionally a bottleneck layer.

    # Arguments
        ip: Input tensor
        nb_filter: integer, the dimensionality of the output space
            (i.e. the number output of filters in the convolution)
        bottleneck: if True, adds a bottleneck convolution block
        dropout_rate: dropout rate
        weight_decay: weight decay factor
        block_prefix: str, for unique layer naming

     # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if data_format='channels_last'.

    # Output shape
        4D tensor with shape:
        `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
        `rows` and `cols` values might have changed due to stride.

    # Returns
        output tensor of block
    '''
    with K.name_scope('ConvBlock'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name=name_or_none(block_prefix, '_bn'))(ip)
        x = Activation('relu')(x)

        if bottleneck:
            inter_channel = nb_filter * 4

            x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
                       kernel_regularizer=l2(weight_decay), name=name_or_none(block_prefix, '_bottleneck_conv2D'))(x)
            x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5,
                                   name=name_or_none(block_prefix, '_bottleneck_bn'))(x)
            x = Activation('relu')(x)

        x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False,
                   name=name_or_none(block_prefix, '_conv2D'))(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)

    return x
Example #4
0
def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None,
                  weight_decay=1e-4, grow_nb_filters=True, return_concat_list=False, block_prefix=None):
    '''
    Build a dense_block where the output of each conv_block is fed
    to subsequent ones

    # Arguments
        x: input keras tensor
        nb_layers: the number of conv_blocks to append to the model
        nb_filter: integer, the dimensionality of the output space
            (i.e. the number output of filters in the convolution)
        growth_rate: growth rate of the dense block
        bottleneck: if True, adds a bottleneck convolution block to
            each conv_block
        dropout_rate: dropout rate
        weight_decay: weight decay factor
        grow_nb_filters: if True, allows number of filters to grow
        return_concat_list: set to True to return the list of
            feature maps along with the actual output
        block_prefix: str, for block unique naming

    # Return
        If return_concat_list is True, returns a list of the output
        keras tensor, the number of filters and a list of all the
        dense blocks added to the keras tensor

        If return_concat_list is False, returns a list of the output
        keras tensor and the number of filters
    '''
    with K.name_scope('DenseBlock'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        x_list = [x]

        for i in range(nb_layers):
            cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay,
                              block_prefix=name_or_none(block_prefix, '_%i' % i))
            x_list.append(cb)

            x = concatenate([x, cb], axis=concat_axis)

            if grow_nb_filters:
                nb_filter += growth_rate

        if return_concat_list:
            return x, nb_filter, x_list
        else:
            return x, nb_filter
Example #5
0
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4, block_prefix=None, transition_pooling='max'):
    '''
    Adds a pointwise convolution layer (with batch normalization and relu),
    and an average pooling layer. The number of output convolution filters
    can be reduced by appropriately reducing the compression parameter.

    # Arguments
        ip: input keras tensor
        nb_filter: integer, the dimensionality of the output space
            (i.e. the number output of filters in the convolution)
        compression: calculated as 1 - reduction. Reduces the number
            of feature maps in the transition block.
        weight_decay: weight decay factor
        block_prefix: str, for block unique naming

    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if data_format='channels_last'.

    # Output shape
        4D tensor with shape:
        `(samples, nb_filter * compression, rows / 2, cols / 2)`
        if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows / 2, cols / 2, nb_filter * compression)`
        if data_format='channels_last'.

    # Returns
        a keras tensor
    '''
    with K.name_scope('Transition'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name=name_or_none(block_prefix, '_bn'))(ip)
        x = Activation('relu')(x)
        x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same',
                   use_bias=False, kernel_regularizer=l2(weight_decay), name=name_or_none(block_prefix, '_conv2D'))(x)
        if transition_pooling == 'avg':
            x = AveragePooling2D((2, 2), strides=(2, 2))(x)
        elif transition_pooling == 'max':
            x = MaxPooling2D((2, 2), strides=(2, 2))(x)

        return x
Example #6
0
def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4, block_prefix=None):
    '''Adds an upsampling block. Upsampling operation relies on the the type parameter.

    # Arguments
        ip: input keras tensor
        nb_filters: integer, the dimensionality of the output space
            (i.e. the number output of filters in the convolution)
        type: can be 'upsampling', 'subpixel', 'deconv'. Determines
            type of upsampling performed
        weight_decay: weight decay factor
        block_prefix: str, for block unique naming

    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if data_format='channels_last'.

    # Output shape
        4D tensor with shape:
        `(samples, nb_filter, rows * 2, cols * 2)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows * 2, cols * 2, nb_filter)` if data_format='channels_last'.

    # Returns
        a keras tensor
    '''
    with K.name_scope('TransitionUp'):

        if type == 'upsampling':
            x = UpSampling2D(name=name_or_none(block_prefix, '_upsampling'))(ip)
        elif type == 'subpixel':
            x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay),
                       use_bias=False, kernel_initializer='he_normal', name=name_or_none(block_prefix, '_conv2D'))(ip)
            x = SubPixelUpscaling(scale_factor=2, name=name_or_none(block_prefix, '_subpixel'))(x)
            x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay),
                       use_bias=False, kernel_initializer='he_normal', name=name_or_none(block_prefix, '_conv2D'))(x)
        else:
            x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2),
                                kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay),
                                name=name_or_none(block_prefix, '_conv2DT'))(ip)
        return x
Example #7
0
def _transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4):
    ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
    Args:
        ip: keras tensor
        nb_filter: number of filters
        compression: calculated as 1 - reduction. Reduces the number of feature maps
                    in the transition block.
        dropout_rate: dropout rate
        weight_decay: weight decay factor
    Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
    '''
    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

    with K.name_scope('transition_block'):
        x = BatchNormalization(axis=concat_axis, epsilon=1e-5, momentum=0.1)(ip)
        x = Activation('relu')(x)
        x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
                   kernel_regularizer=l2(weight_decay))(x)
        x = AveragePooling2D((2, 2), strides=(2, 2))(x)

    return x
def _add_auxiliary_head(x, classes, weight_decay, pooling, include_top):
    '''Adds an auxiliary head for training the model

    From section A.7 "Training of ImageNet models" of the paper, all NASNet models are
    trained using an auxiliary classifier around 2/3 of the depth of the network, with
    a loss weight of 0.4

    # Arguments
        x: input tensor
        classes: number of output classes
        weight_decay: l2 regularization weight

    # Returns
        a keras Tensor
    '''
    img_height = 1 if K.image_data_format() == 'channels_last' else 2
    img_width = 2 if K.image_data_format() == 'channels_last' else 3
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    with K.name_scope('auxiliary_branch'):
        auxiliary_x = Activation('relu')(x)
        auxiliary_x = AveragePooling2D((5, 5),
                                       strides=(3, 3),
                                       padding='valid',
                                       name='aux_pool')(auxiliary_x)
        auxiliary_x = Conv2D(128, (1, 1),
                             padding='same',
                             use_bias=False,
                             name='aux_conv_projection',
                             kernel_initializer='he_normal',
                             kernel_regularizer=l2(weight_decay))(auxiliary_x)
        auxiliary_x = BatchNormalization(axis=channel_axis,
                                         momentum=_BN_DECAY,
                                         epsilon=_BN_EPSILON,
                                         name='aux_bn_projection')(auxiliary_x)
        auxiliary_x = Activation('relu')(auxiliary_x)

        auxiliary_x = Conv2D(768, (auxiliary_x._keras_shape[img_height],
                                   auxiliary_x._keras_shape[img_width]),
                             padding='valid',
                             use_bias=False,
                             kernel_initializer='he_normal',
                             kernel_regularizer=l2(weight_decay),
                             name='aux_conv_reduction')(auxiliary_x)
        auxiliary_x = BatchNormalization(axis=channel_axis,
                                         momentum=_BN_DECAY,
                                         epsilon=_BN_EPSILON,
                                         name='aux_bn_reduction')(auxiliary_x)
        auxiliary_x = Activation('relu')(auxiliary_x)

        if include_top:
            auxiliary_x = Flatten()(auxiliary_x)
            auxiliary_x = Dense(classes,
                                activation='softmax',
                                kernel_regularizer=l2(weight_decay),
                                name='aux_predictions')(auxiliary_x)
        else:
            if pooling == 'avg':
                auxiliary_x = GlobalAveragePooling2D()(auxiliary_x)
            elif pooling == 'max':
                auxiliary_x = GlobalMaxPooling2D()(auxiliary_x)

    return auxiliary_x
Example #9
0
 def __init__(self, weight_decay, **kwargs):
     with K.name_scope(self.__class__.__name__):
         self.weight_decay = K.variable(weight_decay, name='weight_decay')
     super(DecoupleWeightDecay, self).__init__(**kwargs)
Example #10
0
def get_gradient_dynamic_norm(model):
    with K.name_scope('gradient_dyn_norm'):
        grads_dyn = K.gradients(model.total_loss, model.trainable_weights[22:])
        norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads_dyn]))
    return norm
def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None):
    '''
    Adjusts the input `p` to match the shape of the `input`
    or situations where the output number of filters needs to
    be changed

    # Arguments:
        p: input tensor which needs to be modified
        ip: input tensor whose shape needs to be matched
        filters: number of output filters to be matched
        weight_decay: l2 regularization weight
        id: string id

    # Returns:
        an adjusted Keras tensor
    '''
    channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
    img_dim = 2 if K.image_data_format() == 'channels_first' else -2

    with K.name_scope('adjust_block'):
        if p is None:
            p = ip

        elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]:
            with K.name_scope('adjust_reduction_block_%s' % id):
                p = Activation('relu', name='adjust_relu_1_%s' % id)(p)

                p1 = AveragePooling2D((1, 1),
                                      strides=(2, 2),
                                      padding='valid',
                                      name='adjust_avg_pool_1_%s' % id)(p)
                p1 = Conv2D(filters // 2, (1, 1),
                            padding='same',
                            use_bias=False,
                            kernel_regularizer=l2(weight_decay),
                            name='adjust_conv_1_%s' % id,
                            kernel_initializer='he_normal')(p1)

                p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
                p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2)
                p2 = AveragePooling2D((1, 1),
                                      strides=(2, 2),
                                      padding='valid',
                                      name='adjust_avg_pool_2_%s' % id)(p2)
                p2 = Conv2D(filters // 2, (1, 1),
                            padding='same',
                            use_bias=False,
                            kernel_regularizer=l2(weight_decay),
                            name='adjust_conv_2_%s' % id,
                            kernel_initializer='he_normal')(p2)

                p = concatenate([p1, p2], axis=channel_dim)
                p = BatchNormalization(axis=channel_dim,
                                       momentum=_BN_DECAY,
                                       epsilon=_BN_EPSILON,
                                       name='adjust_bn_%s' % id)(p)

        elif p._keras_shape[channel_dim] != filters:
            with K.name_scope('adjust_projection_block_%s' % id):
                p = Activation('relu')(p)
                p = Conv2D(filters, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='adjust_conv_projection_%s' % id,
                           use_bias=False,
                           kernel_regularizer=l2(weight_decay),
                           kernel_initializer='he_normal')(p)
                p = BatchNormalization(axis=channel_dim,
                                       momentum=_BN_DECAY,
                                       epsilon=_BN_EPSILON,
                                       name='adjust_bn_%s' % id)(p)
    return p
Example #12
0
def remove_squeezable_dimensions(
    labels, predictions, expected_rank_diff=0, name=None):
  """Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  """
  with K.name_scope(name or 'remove_squeezable_dimensions'):
    if not isinstance(predictions, tf.RaggedTensor):
      predictions = tf.convert_to_tensor(predictions)
    if not isinstance(labels, tf.RaggedTensor):
      labels = tf.convert_to_tensor(labels)
    predictions_shape = predictions.shape
    predictions_rank = predictions_shape.ndims
    labels_shape = labels.shape
    labels_rank = labels_shape.ndims
    if (labels_rank is not None) and (predictions_rank is not None):
      # Use static rank.
      rank_diff = predictions_rank - labels_rank
      if (rank_diff == expected_rank_diff + 1 and
          predictions_shape.dims[-1].is_compatible_with(1)):
        predictions = tf.compat.v1.squeeze(predictions, [-1])
      elif (rank_diff == expected_rank_diff - 1 and
            labels_shape.dims[-1].is_compatible_with(1)):
        labels = tf.compat.v1.squeeze(labels, [-1])
      return labels, predictions

    # Use dynamic rank.
    rank_diff = tf.rank(predictions) - tf.rank(labels)
    if (predictions_rank is None) or (
        predictions_shape.dims[-1].is_compatible_with(1)):
      predictions = tf.compat.v1.cond(
          tf.equal(expected_rank_diff + 1, rank_diff),
          lambda: tf.compat.v1.squeeze(predictions, [-1]),
          lambda: predictions)
    if (labels_rank is None) or (
        labels_shape.dims[-1].is_compatible_with(1)):
      labels = tf.compat.v1.cond(
          tf.equal(expected_rank_diff - 1, rank_diff),
          lambda: tf.compat.v1.squeeze(labels, [-1]),
          lambda: labels)
    return labels, predictions
Example #13
0
def vgg16(config, fake, real, layers):
    features = []
    parameters = []

    with K.name_scope("VGG16"):
        # Preprocess
        mean = tf.constant([123.68, 116.779, 103.939],
                           dtype=tf.float32,
                           shape=[1, 1, 1, 3],
                           name="img_mean")
        fake = tf.image.resize_images(fake, size=[224, 224]) * 255.0 - mean
        real = tf.image.resize_images(real, size=[224, 224]) * 255.0 - mean

        # First Convolution
        w_conv11 = tf.Variable(tf.truncated_normal([3, 3, 3, 64],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv11 = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv11, b_conv11]

        # Output
        conv11 = tf.nn.conv2d(
            fake, w_conv11, strides=[1, 1, 1, 1], padding='SAME') + b_conv11
        conv11 = tf.nn.relu(conv11)

        # Ground-Truth
        conv11_gt = tf.nn.conv2d(
            real, w_conv11, strides=[1, 1, 1, 1], padding='SAME') + b_conv11
        conv11_gt = tf.nn.relu(conv11_gt)

        # Loss
        if "relu11" in layers:
            features += [conv11, conv11_gt]

        # Second Convolution
        w_conv12 = tf.Variable(tf.truncated_normal([3, 3, 64, 64],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv12 = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv12, b_conv12]

        # Output
        conv12 = tf.nn.conv2d(
            conv11, w_conv12, strides=[1, 1, 1, 1], padding='SAME') + b_conv12
        conv12 = tf.nn.relu(conv12)

        # Ground-Truth
        conv12_gt = tf.nn.conv2d(
            conv11_gt, w_conv12, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv12
        conv12_gt = tf.nn.relu(conv12_gt)

        # Loss
        if "relu12" in layers:
            features += [conv12, conv12_gt]

        # First Maxpool
        pool1 = tf.nn.max_pool(conv12,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name="pool1")
        pool1_gt = tf.nn.max_pool(conv12_gt,
                                  ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1],
                                  padding='SAME',
                                  name="pool1_gt")

        # Third Convolution
        w_conv21 = tf.Variable(tf.truncated_normal([3, 3, 64, 128],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv21 = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv21, b_conv21]

        # Output
        conv21 = tf.nn.conv2d(
            pool1, w_conv21, strides=[1, 1, 1, 1], padding='SAME') + b_conv21
        conv21 = tf.nn.relu(conv21)

        # Ground-Truth
        conv21_gt = tf.nn.conv2d(
            pool1_gt, w_conv21, strides=[1, 1, 1, 1
                                         ], padding='SAME') + b_conv21
        conv21_gt = tf.nn.relu(conv21_gt)

        # Loss
        if "relu21" in layers:
            features += [conv21, conv21_gt]

        # Fourth Convolution
        w_conv22 = tf.Variable(tf.truncated_normal([3, 3, 128, 128],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv22 = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv22, b_conv22]

        # Output
        conv22 = tf.nn.conv2d(
            conv21, w_conv22, strides=[1, 1, 1, 1], padding='SAME') + b_conv22
        conv22 = tf.nn.relu(conv22)

        # Ground-Truth
        conv22_gt = tf.nn.conv2d(
            conv21_gt, w_conv22, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv22
        conv22_gt = tf.nn.relu(conv22_gt)

        # Loss
        if "relu22" in layers:
            features += [conv22, conv22_gt]

        # Second Maxpool
        pool2 = tf.nn.max_pool(conv22,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name="pool2")
        pool2_gt = tf.nn.max_pool(conv22_gt,
                                  ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1],
                                  padding='SAME',
                                  name="pool2_gt")

        # Fifth Convolution
        w_conv31 = tf.Variable(tf.truncated_normal([3, 3, 128, 256],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv31 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv31, b_conv31]

        # Output
        conv31 = tf.nn.conv2d(
            pool2, w_conv31, strides=[1, 1, 1, 1], padding='SAME') + b_conv31
        conv31 = tf.nn.relu(conv31)

        # Ground-Truth
        conv31_gt = tf.nn.conv2d(
            pool2_gt, w_conv31, strides=[1, 1, 1, 1
                                         ], padding='SAME') + b_conv31
        conv31_gt = tf.nn.relu(conv31_gt)

        # Loss
        if "relu31" in layers:
            features += [conv31, conv31_gt]

        # Sixth Convolution
        w_conv32 = tf.Variable(tf.truncated_normal([3, 3, 256, 256],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv32 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv32, b_conv32]

        # Output
        conv32 = tf.nn.conv2d(
            conv31, w_conv32, strides=[1, 1, 1, 1], padding='SAME') + b_conv32
        conv32 = tf.nn.relu(conv32)

        # Ground-Truth
        conv32_gt = tf.nn.conv2d(
            conv31_gt, w_conv32, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv32
        conv32_gt = tf.nn.relu(conv32_gt)

        # Loss
        if "relu32" in layers:
            features += [conv32, conv32_gt]

        # Seventh Convolution
        w_conv33 = tf.Variable(tf.truncated_normal([3, 3, 256, 256],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv33 = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv33, b_conv33]

        # Output
        conv33 = tf.nn.conv2d(
            conv32, w_conv33, strides=[1, 1, 1, 1], padding='SAME') + b_conv33
        conv33 = tf.nn.relu(conv33)

        # Ground-Truth
        conv33_gt = tf.nn.conv2d(
            conv32_gt, w_conv33, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv33
        conv33_gt = tf.nn.relu(conv33_gt)

        # Loss
        if "relu33" in layers:
            features += [conv33, conv33_gt]

        # Third Maxpool
        pool3 = tf.nn.max_pool(conv33,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name="pool3")
        pool3_gt = tf.nn.max_pool(conv33_gt,
                                  ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1],
                                  padding='SAME',
                                  name="pool3_gt")

        # Eighth Convolution
        w_conv41 = tf.Variable(tf.truncated_normal([3, 3, 256, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv41 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv41, b_conv41]

        # Output
        conv41 = tf.nn.conv2d(
            pool3, w_conv41, strides=[1, 1, 1, 1], padding='SAME') + b_conv41
        conv41 = tf.nn.relu(conv41)

        # Ground-Truth
        conv41_gt = tf.nn.conv2d(
            pool3_gt, w_conv41, strides=[1, 1, 1, 1
                                         ], padding='SAME') + b_conv41
        conv41_gt = tf.nn.relu(conv41_gt)

        # Loss
        if "relu41" in layers:
            features += [conv41, conv41_gt]

        # Nineth Convolution
        w_conv42 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv42 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv42, b_conv42]

        # Output
        conv42 = tf.nn.conv2d(
            conv41, w_conv42, strides=[1, 1, 1, 1], padding='SAME') + b_conv42
        conv42 = tf.nn.relu(conv42)

        # Ground-Truth
        conv42_gt = tf.nn.conv2d(
            conv41_gt, w_conv42, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv42
        conv42_gt = tf.nn.relu(conv42_gt)

        # Loss
        if "relu42" in layers:
            features += [conv42, conv42_gt]

        # Tenth Convolution
        w_conv43 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv43 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv43, b_conv43]

        # Output
        conv43 = tf.nn.conv2d(
            conv42, w_conv43, strides=[1, 1, 1, 1], padding='SAME') + b_conv43
        conv43 = tf.nn.relu(conv43)

        # Ground-Truth
        conv43_gt = tf.nn.conv2d(
            conv42_gt, w_conv43, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv43
        conv43_gt = tf.nn.relu(conv43_gt)

        # Loss
        if "relu43" in layers:
            features += [conv43, conv43_gt]

        # Fourth Maxpool
        pool4 = tf.nn.max_pool(conv43,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name="pool4")
        pool4_gt = tf.nn.max_pool(conv43_gt,
                                  ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1],
                                  padding='SAME',
                                  name="pool4_gt")

        # Eleventh Convolution
        w_conv51 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv51 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv51, b_conv51]

        # Output
        conv51 = tf.nn.conv2d(
            pool4, w_conv51, strides=[1, 1, 1, 1], padding='SAME') + b_conv51
        conv51 = tf.nn.relu(conv51)

        # Ground-Truth
        conv51_gt = tf.nn.conv2d(
            pool4_gt, w_conv51, strides=[1, 1, 1, 1
                                         ], padding='SAME') + b_conv51
        conv51_gt = tf.nn.relu(conv51_gt)

        # Loss
        if "relu51" in layers:
            features += [conv51, conv51_gt]

        # Twelfth Convolution
        w_conv52 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv52 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv52, b_conv52]

        # Output
        conv52 = tf.nn.conv2d(
            conv51, w_conv52, strides=[1, 1, 1, 1], padding='SAME') + b_conv52
        conv52 = tf.nn.relu(conv52)

        # Ground-Truth
        conv52_gt = tf.nn.conv2d(
            conv51_gt, w_conv52, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv52
        conv52_gt = tf.nn.relu(conv52_gt)

        # Loss
        if "relu52" in layers:
            features += [conv52, conv52_gt]

        # Thirteenth Convolution
        w_conv53 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                   dtype=tf.float32,
                                                   stddev=1e-1),
                               trainable=False,
                               name="weights")
        b_conv53 = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                               trainable=False,
                               name="biases")
        parameters += [w_conv53, b_conv53]

        # Output
        conv53 = tf.nn.conv2d(
            conv52, w_conv53, strides=[1, 1, 1, 1], padding='SAME') + b_conv53
        conv53 = tf.nn.relu(conv53)

        # Ground-Truth
        conv53_gt = tf.nn.conv2d(
            conv52_gt, w_conv53, strides=[1, 1, 1, 1
                                          ], padding='SAME') + b_conv53
        conv53_gt = tf.nn.relu(conv53_gt)

        # Loss
        if "relu53" in layers:
            features += [conv53, conv53_gt]

        # Fifth Maxpool
        pool5 = tf.nn.max_pool(conv53,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name="pool5")
        pool5_gt = tf.nn.max_pool(conv53_gt,
                                  ksize=[1, 2, 2, 1],
                                  strides=[1, 2, 2, 1],
                                  padding='SAME',
                                  name="pool5_gt")

        # FC Parameters
        shape = int(np.prod(pool5.get_shape()[1:]))
        pool5_flat = tf.reshape(pool5, [-1, shape])
        pool5_gt_flat = tf.reshape(pool5_gt, [-1, shape])

        # First FC
        w_fc1 = tf.Variable(tf.truncated_normal([shape, 4096],
                                                dtype=tf.float32,
                                                stddev=1e-1),
                            trainable=False,
                            name="weights")
        b_fc1 = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                            trainable=False,
                            name="biases")
        parameters += [w_fc1, b_fc1]

        # Output
        fc1 = tf.matmul(pool5_flat, w_fc1) + b_fc1
        fc1 = tf.nn.relu(fc1)

        # Ground-Truth
        fc1_gt = tf.matmul(pool5_gt_flat, w_fc1) + b_fc1
        fc1_gt = tf.nn.relu(fc1_gt)

        # Loss
        if "fc1" in layers:
            features += [fc1, fc1_gt]

        # Second FC
        w_fc2 = tf.Variable(tf.truncated_normal([4096, 4096],
                                                dtype=tf.float32,
                                                stddev=1e-1),
                            trainable=False,
                            name="weights")
        b_fc2 = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
                            trainable=False,
                            name="biases")
        parameters += [w_fc2, b_fc2]

        # Output
        fc2 = tf.matmul(fc1, w_fc2) + b_fc2
        fc2 = tf.nn.relu(fc2)

        # Ground-Truth
        fc2_gt = tf.matmul(fc1_gt, w_fc2) + b_fc2
        fc2_gt = tf.nn.relu(fc2_gt)

        # Loss
        if "fc2" in layers:
            features += [fc2, fc2_gt]

        # Third FC
        w_fc3 = tf.Variable(tf.truncated_normal([4096, 1000],
                                                dtype=tf.float32,
                                                stddev=1e-1),
                            trainable=False,
                            name="weights")
        b_fc3 = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
                            trainable=False,
                            name="biases")
        parameters += [w_fc3, b_fc3]

        # Output
        fc3 = tf.matmul(fc2, w_fc3) + b_fc3

        # Ground-Truth
        fc3_gt = tf.matmul(fc2_gt, w_fc3) + b_fc3

        # Loss
        if "fc3" in layers:
            features += [fc3, fc3_gt]

    # Load Weights
    load_weights(config, parameters)
    return features
Example #14
0
    def _call(self, inputs, **kwargs):
        if self.proto_number == self.capsule_number:
            return inputs
        else:
            signals = inputs[0]
            diss = inputs[1]
            signal_shape = mixed_shape(signals)

            if self.use_for_loop:
                diss_stack = []
                signals_stack = []
                sub_idx = None
                with K.name_scope('for_loop'):
                    for p in self._proto_distrib:
                        with K.name_scope('compute_slices'):
                            diss_ = diss[:, p[0]:(p[-1]+1)]
                            signals_ = K.reshape(signals[:, p[0]:(p[-1]+1), :],
                                                 [signal_shape[0] * len(p)] + list(signal_shape[2:]))
                        with K.name_scope('competition'):
                            if len(p) > 1:
                                with K.name_scope('competition_indices'):
                                    argmin_idx = K.argmin(diss_, axis=-1)
                                    if sub_idx is None:
                                        sub_idx = K.arange(0, signal_shape[0], dtype=argmin_idx.dtype)
                                    argmin_idx = argmin_idx + len(p) * sub_idx

                                with K.name_scope('dissimilarity_competition'):
                                    diss_stack.append(K.expand_dims(K.gather(K.flatten(diss_), argmin_idx), -1))

                                with K.name_scope('signal_competition'):
                                    signals_stack.append(K.gather(signals_, argmin_idx))
                            else:
                                diss_stack.append(diss_)
                                signals_stack.append(signals_)

                diss = K.concatenate(diss_stack, 1)

                with K.name_scope('signal_concatenation'):
                    signals = K.concatenate(signals_stack, 1)
                    signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:]))

            else:
                with K.name_scope('dissimilarity_preprocessing'):
                    # extend if it is not equally distributed
                    if not self._equally_distributed:
                        # permute to first dimension is prototype (protos x batch)
                        diss = K.permute_dimensions(diss, [1, 0])
                        # gather regarding extension (preparing for reshape to block)
                        diss = K.gather(diss, self._proto_extension)
                        # permute back (max_proto_number x (max_proto_number * batch))
                        diss = K.permute_dimensions(diss, [1, 0])

                    # reshape to block form
                    diss = K.reshape(diss, [signal_shape[0] * self.capsule_number, self._max_proto_number_in_capsule])

                with K.name_scope('competition_indices'):
                    # get minimal idx in each class and batch for element selection in diss and signals
                    argmin_idx = K.argmin(diss, axis=-1)
                    argmin_idx = argmin_idx + self._max_proto_number_in_capsule * \
                                 K.arange(0, signal_shape[0] * self.capsule_number, dtype=argmin_idx.dtype)

                with K.name_scope('dissimilarity_competition'):
                    # get minimal values in the form (batch x capsule)
                    diss = K.gather(K.flatten(diss), argmin_idx)
                    diss = K.reshape(diss, [signal_shape[0], self.capsule_number])

                with K.name_scope('signal_preprocessing'):
                    # apply the same steps as above for signals
                    # get signals in: (batch x protos x dim1 x ... x dimN) --> out: (batch x capsule x dim1 x ... x dimN)
                    # extend if is not equally distributed
                    if not self._equally_distributed:
                        signals = K.permute_dimensions(signals, [1, 0] + list(range(2, len(signal_shape))))
                        signals = K.gather(signals, self._proto_extension)
                        signals = K.permute_dimensions(signals, [1, 0] + list(range(2, len(signal_shape))))

                    signals = K.reshape(signals,
                                        [signal_shape[0] * self.capsule_number * self._max_proto_number_in_capsule]
                                        + list(signal_shape[2:]))

                with K.name_scope('signal_competition'):
                    signals = K.gather(signals, argmin_idx)
                    signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:]))

            return {0: signals, 1: diss}
Example #15
0
def _normal_A(ip, p, filters, weight_decay=5e-5, id=None):
    '''Adds a Normal cell for NASNet-A (Fig. 4 in the paper)

    # Arguments:
        ip: input tensor `x`
        p: input tensor `p`
        filters: number of output filters
        weight_decay: l2 regularization weight
        id: string id

    # Returns:
        a Keras tensor
    '''
    global NORMAL_IDX
    channel_dim = 1 if K.image_data_format() == 'channels_first' else -1

    weights = load_normal_call(NORMAL_IDX)
    NORMAL_IDX += 1

    with K.name_scope('normal_A_block_%s' % id):
        p = _adjust_block(p, ip, filters, weight_decay, id, weights)

        h = Activation('relu')(ip)
        h = Conv2D(filters, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='normal_conv_1_%s' % id,
                   use_bias=False,
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(weight_decay),
                   weights=[weights['begin_W']])(h)
        h = BatchNormalization(axis=channel_dim,
                               momentum=_BN_DECAY,
                               epsilon=_BN_EPSILON,
                               name='normal_bn_1_%s' % id,
                               weights=weights['begin_bn'])(h)

        with K.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters,
                                         kernel_size=(5, 5),
                                         weight_decay=weight_decay,
                                         id='normal_left1_%s' % id,
                                         weights=weights['left_0'])
            x1_2 = _separable_conv_block(p,
                                         filters,
                                         weight_decay=weight_decay,
                                         id='normal_right1_%s' % id,
                                         weights=weights['right_0'])
            x1 = add([x1_1, x1_2], name='normal_add_1_%s' % id)

        with K.name_scope('block_2'):
            x2_1 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         weight_decay=weight_decay,
                                         id='normal_left2_%s' % id,
                                         weights=weights['left_1'])
            x2_2 = _separable_conv_block(p,
                                         filters, (3, 3),
                                         weight_decay=weight_decay,
                                         id='normal_right2_%s' % id,
                                         weights=weights['right_1'])
            x2 = add([x2_1, x2_2], name='normal_add_2_%s' % id)

        with K.name_scope('block_3'):
            x3 = AveragePooling2D((3, 3),
                                  strides=(1, 1),
                                  padding='same',
                                  name='normal_left3_%s' % (id))(h)
            x3 = add([x3, p], name='normal_add_3_%s' % id)

        with K.name_scope('block_4'):
            x4_1 = AveragePooling2D((3, 3),
                                    strides=(1, 1),
                                    padding='same',
                                    name='normal_left4_%s' % (id))(p)
            x4_2 = AveragePooling2D((3, 3),
                                    strides=(1, 1),
                                    padding='same',
                                    name='normal_right4_%s' % (id))(p)
            x4 = add([x4_1, x4_2], name='normal_add_4_%s' % id)

        with K.name_scope('block_5'):
            x5 = _separable_conv_block(h,
                                       filters,
                                       weight_decay=weight_decay,
                                       id='normal_left5_%s' % id,
                                       weights=weights['left_4'])
            x5 = add([x5, h], name='normal_add_5_%s' % id)

        x = concatenate([p, x1, x2, x3, x4, x5],
                        axis=channel_dim,
                        name='normal_concat_%s' % id)
    return x, ip
 def _define_generator_loss(self, logits):
     with K.name_scope('G_loss'):
         return losses.l1_distance(self.fake_images, logits)
Example #17
0
    def _call(self, inputs, **kwargs):
        if self.proto_number == self.capsule_number:
            return inputs

        else:
            signals = inputs[0]
            diss = inputs[1]

            signal_shape = None

            # signal.shape: (batch, proto_num, caps_dim1, ..., caps_dimN)
            if self.input_spec[0].ndim > 3:
                signal_shape = mixed_shape(signals)
                signals = K.reshape(signals, signal_shape[0:2] + (-1,))

            if not self._equally_distributed:
                if self.use_for_loop:
                    signals_stack = []
                    diss_stack = []
                    with K.name_scope('for_loop'):
                        for i, p in enumerate(self._proto_distrib):
                            with K.name_scope('compute_slices'):
                                diss_ = diss[:, p[0]:(p[-1]+1)]
                                signals_ = signals[:, p[0]:(p[-1] + 1), :]

                            if len(p) > 1:
                                with K.name_scope('competition_probabilities'):
                                    coefficients = prob_trans.NegSoftmax(axis=-1, max_stabilization=True)(
                                        diss_ * self.beta[i])

                                with K.name_scope('signal_competition'):
                                    signals_stack.append(K.expand_dims(K.batch_dot(coefficients, signals_, [1, 1]), 1))

                                with K.name_scope('dissimilarity_competition'):
                                    diss_stack.append(K.batch_dot(coefficients, diss_, [1, 1]))
                            else:
                                signals_stack.append(signals_)
                                diss_stack.append(diss_)

                    signals = K.concatenate(signals_stack, axis=1)
                    diss = K.concatenate(diss_stack, axis=-1)
                else:
                    extension_idx = []
                    for i in self._proto_extension:
                        if i not in extension_idx:
                            extension_idx.append(i)
                        else:
                            extension_idx.append(max(self._proto_extension)+1)

                    batch_size = K.shape(signals)[0] if signal_shape is None else signal_shape[0]
                    # reshape to block
                    with K.name_scope('competition_probabilities'):
                        with K.name_scope('neg_softmax'):
                            with K.name_scope('coefficients'):
                                beta = K.gather(self.beta, self._capsule_extension)
                                coefficients = -diss * beta
                                # max stabilization
                                coefficients = coefficients - K.max(coefficients, axis=-1, keepdims=True)
                                coefficients = K.exp(coefficients)
                                coefficients = K.concatenate([coefficients,
                                                              K.zeros_like(coefficients[:, 0:1])], axis=-1)
                                coefficients = K.transpose(coefficients)
                                coefficients = K.gather(coefficients, extension_idx)
                                coefficients = K.transpose(coefficients)
                                coefficients = K.reshape(coefficients,
                                                         [batch_size, self.capsule_number,
                                                          self._max_proto_number_in_capsule])
                            # could never be a zero division
                            with K.name_scope('normalization_constant'):
                                constant = K.sum(coefficients, axis=-1, keepdims=True)

                            probs = coefficients / constant

                    with K.name_scope('dissimilarity_preprocessing'):
                        diss = K.transpose(diss)
                        diss = K.gather(diss, self._proto_extension)
                        diss = K.transpose(diss)
                        diss = K.reshape(diss,
                                         [batch_size, self.capsule_number, self._max_proto_number_in_capsule])

                    with K.name_scope('dissimilarity_competition'):
                        diss = K.squeeze(K.batch_dot(probs, K.expand_dims(diss), [2, 2]), -1)

                    with K.name_scope('signal_preprocessing'):
                        signals = K.permute_dimensions(signals, [1, 0, 2])
                        signals = K.gather(signals, self._proto_extension)
                        signals = K.permute_dimensions(signals, [1, 0, 2])
                        signals = K.reshape(signals,
                                            [batch_size, self.capsule_number, self._max_proto_number_in_capsule, -1])

                    with K.name_scope('signal_competition'):
                        signals = K.batch_dot(probs, signals, [2, 2])

            else:
                batch_size = K.shape(signals)[0] if signal_shape is None else signal_shape[0]
                diss = K.reshape(diss, [batch_size, self.capsule_number, self._max_proto_number_in_capsule])

                with K.name_scope('competition_probabilities'):
                    coefficients = prob_trans.NegSoftmax(axis=-1, max_stabilization=True)(
                        diss * K.expand_dims(self.beta, -1))

                with K.name_scope('signal_competition'):
                    signals = K.reshape(signals,
                                        [batch_size, self.capsule_number, self._max_proto_number_in_capsule, -1])
                    signals = K.batch_dot(coefficients, signals, [2, 2])

                with K.name_scope('dissimilarity_competition'):
                    diss = K.squeeze(K.batch_dot(coefficients, K.expand_dims(diss), [2, 2]), -1)

            if self.input_spec[0].ndim > 3:
                signals = K.reshape(signals, [signal_shape[0], self.capsule_number] + list(signal_shape[2:]))

            return {0: signals, 1: diss}
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
    """Replicates a model on different GPUs.

  Specifically, this function implements single-machine
  multi-GPU data parallelism. It works in the following way:

  - Divide the model's input(s) into multiple sub-batches.
  - Apply a model copy on each sub-batch. Every model copy
      is executed on a dedicated GPU.
  - Concatenate the results (on CPU) into one big batch.

  E.g. if your `batch_size` is 64 and you use `gpus=2`,
  then we will divide the input into 2 sub-batches of 32 samples,
  process each sub-batch on one GPU, then return the full
  batch of 64 processed samples.

  This induces quasi-linear speedup on up to 8 GPUs.

  This function is only available with the TensorFlow backend
  for the time being.

  Args:
      model: A Keras model instance. To avoid OOM errors,
          this model could have been built on CPU, for instance
          (see usage example below).
      gpus: Integer >= 2, number of on GPUs on which to create
          model replicas.
      cpu_merge: A boolean value to identify whether to force
          merging model weights under the scope of the CPU or not.
      cpu_relocation: A boolean value to identify whether to
          create the model's weights under the scope of the CPU.
          If the model is not defined under any preceding device
          scope, you can still rescue it by activating this option.

  Returns:
      A Keras `Model` instance which can be used just like the initial
      `model` argument, but which distributes its workload on multiple GPUs.

  Example 1: Training models with weights merge on CPU

  ```python
      import tensorflow as tf
      from keras.applications import Xception
      from keras.utils import multi_gpu_model
      import numpy as np

      num_samples = 1000
      height = 224
      width = 224
      num_classes = 1000

      # Instantiate the base model (or "template" model).
      # We recommend doing this with under a CPU device scope,
      # so that the model's weights are hosted on CPU memory.
      # Otherwise they may end up hosted on a GPU, which would
      # complicate weight sharing.
      with tf.device('/cpu:0'):
          model = Xception(weights=None,
                           input_shape=(height, width, 3),
                           classes=num_classes)

      # Replicates the model on 8 GPUs.
      # This assumes that your machine has 8 available GPUs.
      parallel_model = multi_gpu_model(model, gpus=8)
      parallel_model.compile(loss='categorical_crossentropy',
                             optimizer='rmsprop')

      # Generate dummy data.
      x = np.random.random((num_samples, height, width, 3))
      y = np.random.random((num_samples, num_classes))

      # This `fit` call will be distributed on 8 GPUs.
      # Since the batch size is 256, each GPU will process 32 samples.
      parallel_model.fit(x, y, epochs=20, batch_size=256)

      # Save model via the template model (which shares the same weights):
      model.save('my_model.h5')
  ```

  Example 2: Training models with weights merge on CPU using cpu_relocation

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_relocation=True)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")

       model.compile(..)
       ..
  ```

  Example 3: Training models with weights merge on GPU (recommended for NV-link)

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_merge=False)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")
       model.compile(..)
       ..
  ```

  Raises:
    ValueError: if the `gpus` argument does not match available devices.
  """
    if isinstance(gpus, (list, tuple)):
        if len(gpus) <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `len(gpus) >= 2`. '
                             'Received: `gpus=%s`' % gpus)
        num_gpus = len(gpus)
        target_gpu_ids = gpus
    else:
        if gpus <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `gpus >= 2`. '
                             'Received: `gpus=%s`' % gpus)
        num_gpus = gpus
        target_gpu_ids = range(num_gpus)

    target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
    available_devices = _get_available_devices()
    available_devices = [
        _normalize_device_name(name) for name in available_devices
    ]
    for device in target_devices:
        if device not in available_devices:
            raise ValueError(
                'To call `multi_gpu_model` with `gpus=%s`, '
                'we expect the following devices to be available: %s. '
                'However this machine only has: %s. '
                'Try reducing `gpus`.' %
                (gpus, target_devices, available_devices))

    def get_slice(data, i, parts):
        """Slice an array into `parts` slices and return slice `i`.

    Args:
      data: array to slice.
      i: index of slice to return.
      parts: number of slices to make.

    Returns:
      Slice `i` of `data`.
    """
        shape = tf.compat.v1.shape(data)
        batch_size = shape[:1]
        input_shape = shape[1:]
        step = batch_size // parts
        if i == parts - 1:
            size = batch_size - step * i
        else:
            size = step
        size = tf.concat([size, input_shape], axis=0)
        stride = tf.concat([step, input_shape * 0], axis=0)
        start = stride * i
        return tf.slice(data, start, size)

    # Relocate the model definition under CPU device scope if needed
    if cpu_relocation:
        from keras.models import clone_model  # pylint: disable=g-import-not-at-top
        with tf.compat.v1.device('/cpu:0'):
            model = clone_model(model)

    all_outputs = [[] for _ in range(len(model.outputs))]

    # Place a copy of the model on each GPU,
    # each getting a slice of the inputs.
    for i, gpu_id in enumerate(target_gpu_ids):
        with tf.compat.v1.device('/gpu:%d' % gpu_id):
            with backend.name_scope('replica_%d' % gpu_id):
                inputs = []
                # Retrieve a slice of the input.
                for x in model.inputs:
                    input_shape = tuple(x.shape.as_list())[1:]
                    slice_i = Lambda(get_slice,
                                     output_shape=input_shape,
                                     arguments={
                                         'i': i,
                                         'parts': num_gpus
                                     })(x)
                    inputs.append(slice_i)

                # Apply model on slice
                # (creating a model replica on the target device).
                outputs = model(inputs)
                if not isinstance(outputs, list):
                    outputs = [outputs]

                # Save the outputs for merging back together later.
                for o, output in enumerate(outputs):
                    all_outputs[o].append(output)

    # Deduplicate output names to handle Siamese networks.
    occurrences = {}
    for n in model.output_names:
        if n not in occurrences:
            occurrences[n] = 1
        else:
            occurrences[n] += 1
    conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
    output_names = []
    for n in model.output_names:
        if n in conflict_counter:
            conflict_counter[n] += 1
            n += '_%d' % conflict_counter[n]
        output_names.append(n)

    # Merge outputs under expected scope.
    with tf.compat.v1.device('/cpu:0' if cpu_merge else '/gpu:%d' %
                             target_gpu_ids[0]):
        merged = []
        for name, outputs in zip(output_names, all_outputs):
            merged.append(concatenate(outputs, axis=0, name=name))
        return Model(model.inputs, merged)
Example #19
0
def gradient_norm(model):
    with K.name_scope('gradient_norm'):
        grads = K.gradients(model.total_loss, model.trainable_weights)
        norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads]))
        return norm
Example #20
0
 def build(self, input_shape):
     with backend.name_scope(self.forward_layer.name):
         self.forward_layer.build(input_shape)
     with backend.name_scope(self.backward_layer.name):
         self.backward_layer.build(input_shape)
     self.built = True
Example #21
0
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1,
                       nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4,
                       subsample_initial_block=False, pooling=None, activation='sigmoid', transition_pooling='avg'):
    ''' Build the DenseNet model

    # Arguments
        nb_classes: number of classes
        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
        include_top: flag to include the final Dense layer
        depth: number or layers
        nb_dense_block: number of dense blocks to add to end (generally = 3)
        growth_rate: number of filters to add per dense block
        nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate
        nb_layers_per_block: number of layers in each dense block.
                Can be a -1, positive integer or a list.
                If -1, calculates nb_layer_per_block from the depth of the network.
                If positive integer, a set number of layers per dense block.
                If list, nb_layer is used as provided. Note that list size must
                be (nb_dense_block + 1)
        bottleneck: add bottleneck blocks
        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
        dropout_rate: dropout rate
        weight_decay: weight decay rate
        subsample_initial_block: Changes model type to suit different datasets.
            Should be set to True for ImageNet, and False for CIFAR datasets.
            When set to True, the initial convolution will be strided and
            adds a MaxPooling2D before the initial dense block.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
                Note that if sigmoid is used, classes must be 1.
        transition_pooling: `avg` for avg pooling (default), `max` for max pooling,
            None for no pooling during scale transition blocks. Please note that this
            default differs from the DenseNetFCN paper in accordance with the DenseNet
            paper.

    # Returns
        a keras tensor

    # Raises
        ValueError: in case of invalid argument for `reduction`
            or `nb_dense_block`
    '''
    with K.name_scope('DenseNet'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        if reduction != 0.0:
            if not (reduction <= 1.0 and reduction > 0.0):
                raise ValueError('`reduction` value must lie between 0.0 and 1.0')

        # layers in each dense block
        if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
            nb_layers = list(nb_layers_per_block)  # Convert tuple to list

            if len(nb_layers) != (nb_dense_block):
                raise ValueError('If `nb_dense_block` is a list, its length must match '
                                 'the number of layers provided by `nb_layers`.')

            final_nb_layer = nb_layers[-1]
            nb_layers = nb_layers[:-1]
        else:
            if nb_layers_per_block == -1:
                assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1'
                count = int((depth - 4) / 3)

                if bottleneck:
                    count = count // 2

                nb_layers = [count for _ in range(nb_dense_block)]
                final_nb_layer = count
            else:
                final_nb_layer = nb_layers_per_block
                nb_layers = [nb_layers_per_block] * nb_dense_block

        # compute initial nb_filter if -1, else accept users initial nb_filter
        if nb_filter <= 0:
            nb_filter = 2 * growth_rate

        # compute compression factor
        compression = 1.0 - reduction

        # Initial convolution
        if subsample_initial_block:
            initial_kernel = (7, 7)
            initial_strides = (2, 2)
        else:
            initial_kernel = (3, 3)
            initial_strides = (1, 1)

        x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', name='initial_conv2D',
                   strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)

        if subsample_initial_block:
            x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x)
            x = Activation('relu')(x)
            x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

        # Add dense blocks
        for block_idx in range(nb_dense_block - 1):
            x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck,
                                         dropout_rate=dropout_rate, weight_decay=weight_decay,
                                         block_prefix='dense_%i' % block_idx)
            # add transition_block
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling)
            nb_filter = int(nb_filter * compression)

        # The last dense_block does not have a transition_block
        x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
                                     dropout_rate=dropout_rate, weight_decay=weight_decay,
                                     block_prefix='dense_%i' % (nb_dense_block - 1))

        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x)
        x = Activation('relu')(x)

        if include_top:
            if pooling == 'avg':
                x = GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = GlobalMaxPooling2D()(x)
            x = Dense(nb_classes, activation=activation)(x)
        else:
            if pooling == 'avg':
                x = GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = GlobalMaxPooling2D()(x)

        return x
    def _call(self, inputs, **kwargs):
        # inverse permutation
        def inv_perm(perm):
            inverse = [0] * len(perm)
            for i, p in enumerate(perm):
                inverse[p] = i
            return inverse

        # signal is dict: extract signal from diss
        signals = inputs[0]

        signal_shape = mixed_shape(signals)
        ndim = self.input_spec[0].ndim
        atom_axes = list(range(3, ndim))
        atom_axes.remove(self.axis)

        if self.scope == 'local':
            with K.name_scope('signal_preprocessing'):
                perm = [1, 2, self.axis, 0] + atom_axes
                signals = K.permute_dimensions(signals, perm)

                if ndim > 4:
                    signals = K.reshape(signals, [
                        signal_shape[1], signal_shape[2],
                        signal_shape[self.axis], -1
                    ])

            with K.name_scope('linear_mapping'):
                # multiply over all batches by using the Theano behavior
                signals = K.batch_dot(self.linear_maps, signals, axes=[2, 2])

            with K.name_scope('signal_postprocessing'):
                if ndim > 4:
                    signals = K.reshape(signals, [
                        signal_shape[1], signal_shape[2], self.output_dim,
                        signal_shape[0]
                    ] + [signal_shape[i] for i in atom_axes])

                signals = K.permute_dimensions(signals, inv_perm(perm))

        elif self.scope == 'global':
            with K.name_scope('signal_preprocessing'):
                dims = list(range(ndim))
                dims.remove(self.axis)
                perm = dims + [self.axis]

                signals = K.permute_dimensions(signals, perm)

            with K.name_scope('linear_mapping'):
                signals = K.dot(signals, self.linear_maps)

            with K.name_scope('signal_postprocessing'):
                signals = K.permute_dimensions(signals, inv_perm(perm))

        elif self.scope == 'channel_wise':
            with K.name_scope('signal_preprocessing'):
                perm = [2, self.axis, 0, 1] + atom_axes
                signals = K.permute_dimensions(signals, perm)

                signals = K.reshape(
                    signals, [signal_shape[2], signal_shape[self.axis], -1])

            with K.name_scope('linear_mapping'):
                # multiply over all batches by using the Theano behavior
                signals = K.batch_dot(self.linear_maps, signals, axes=[1, 1])

            with K.name_scope('signal_postprocessing'):
                signals = K.reshape(signals, [
                    signal_shape[2], self.output_dim, signal_shape[0],
                    signal_shape[1]
                ] + [signal_shape[i] for i in atom_axes])

                signals = K.permute_dimensions(signals, inv_perm(perm))

        else:  # capsule_wise
            with K.name_scope('signal_preprocessing'):
                perm = [1, self.axis, 0, 2] + atom_axes
                signals = K.permute_dimensions(signals, perm)

                signals = K.reshape(
                    signals, [signal_shape[1], signal_shape[self.axis], -1])

            with K.name_scope('linear_mapping'):
                # multiply over all batches by using the Theano behavior
                signals = K.batch_dot(self.linear_maps, signals, axes=[1, 1])

            with K.name_scope('signal_postprocessing'):
                signals = K.reshape(signals, [
                    signal_shape[1], self.output_dim, signal_shape[0],
                    signal_shape[2]
                ] + [signal_shape[i] for i in atom_axes])

                signals = K.permute_dimensions(signals, inv_perm(perm))

        inputs[0] = signals
        return inputs
def _reduction_a_cell(ip, p, filters, block_id=None):
    '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper).

    # Arguments
        ip: Input tensor `x`
        p: Input tensor `p`
        filters: Number of output filters
        block_id: String block_id

    # Returns
        A Keras tensor
    '''
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('reduction_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          kernel_regularizer=l2(weight_decay),
                          name='reduction_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        if use_bn:
            h = layers.BatchNormalization(axis=channel_dim,
                                          momentum=bn_momentum,
                                          epsilon=1e-3,
                                          name='reduction_bn_1_%s' %
                                          block_id)(h)
            h = layers.SpatialDropout2D(drop_p)(h)
        h3 = layers.ZeroPadding2D(padding=correct_pad(backend, h, 3),
                                  name='reduction_pad_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_left1_%s' %
                                         block_id)
            x1_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left2_%s' % block_id)(h3)
            x2_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left3_%s' % block_id)(h3)
            x3_2 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_right3_%s' %
                                         block_id)
            x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='reduction_left4_%s' % block_id)(x1)
            x4 = layers.add([x2, x4])

        with backend.name_scope('block_5'):
            x5_1 = _separable_conv_block(x1,
                                         filters, (3, 3),
                                         block_id='reduction_left4_%s' %
                                         block_id)
            x5_2 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_right5_%s' % block_id)(h3)
            x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id)

        x = layers.concatenate([x2, x3, x4, x5],
                               axis=channel_dim,
                               name='reduction_concat_%s' % block_id)

        return x, ip
 def __init__(self, standard_deviation=0.3, **kwargs):
     super(NoisyOptimizer, self).__init__(**kwargs)
     with K.name_scope(self.__class__.__name__):
         self.standard_deviation = K.variable(standard_deviation,
                                              name='standard_deviation')
Example #25
0
def _num_elements(losses):
  """Computes the number of elements in `losses` tensor."""
  with K.name_scope('num_elements') as scope:
    return tf.cast(tf.compat.v1.size(losses, name=scope), dtype=losses.dtype)
Example #26
0
def NASNet(input_shape=None,
           penultimate_filters=4032,
           nb_blocks=6,
           stem_filters=96,
           skip_reduction=True,
           use_auxilary_branch=False,
           filters_multiplier=2,
           dropout=0.5,
           include_top=True,
           weights=None,
           input_tensor=None,
           pooling=None,
           classes=1000,
           default_size=None):
    """Instantiates a NASNet architecture.
    Note that only TensorFlow is supported for now,
    therefore it only works with the data format
    `image_data_format='channels_last'` in your Keras config
    at `~/.keras/keras.json`.

    # Arguments
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(331, 331, 3)` for NASNetLarge or
            `(224, 224, 3)` for NASNetMobile
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(224, 224, 3)` would be one valid value.
        penultimate_filters: number of filters in the penultimate layer.
            NASNet models use the notation `NASNet (N @ P)`, where:
                -   N is the number of blocks
                -   P is the number of penultimate filters
        nb_blocks: number of repeated blocks of the NASNet model.
            NASNet models use the notation `NASNet (N @ P)`, where:
                -   N is the number of blocks
                -   P is the number of penultimate filters
        stem_filters: number of filters in the initial stem block
        skip_reduction: Whether to skip the reduction step at the tail
            end of the network. Set to `False` for CIFAR models.
        use_auxilary_branch: Whether to use the auxilary branch during
            training or evaluation.
        filters_multiplier: controls the width of the network.
            - If `filters_multiplier` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `filters_multiplier` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `filters_multiplier` = 1, default number of filters from the paper
                 are used at each layer.
        dropout: dropout rate
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: `None` (random initialization) or
            `imagenet` (ImageNet weights)
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        default_size: specifies the default image size of the model
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
    """
    if K.backend() != 'tensorflow':
        raise RuntimeError('Only Tensorflow backend is currently supported, '
                           'as other backends do not support '
                           'separable convolution.')

    if weights not in {'imagenet', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `imagenet` '
                         '(pre-training on ImageNet).')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as ImageNet with `include_top` '
                         'as true, `classes` should be 1000')

    if default_size is None:
        default_size = 331

    # Determine proper input shape and default size.
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top or weights)

    if K.image_data_format() != 'channels_last':
        warnings.warn('The MobileNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        K.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    assert penultimate_filters % 24 == 0, "`penultimate_filters` needs to be divisible " \
                                          "by 6 * (2^N)."

    channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
    filters = penultimate_filters // 24

    x = Conv2D(stem_filters, (3, 3),
               strides=(2, 2),
               padding='valid',
               use_bias=False,
               name='stem_conv1',
               kernel_initializer='he_normal')(img_input)
    x = BatchNormalization(axis=channel_dim,
                           momentum=_BN_DECAY,
                           epsilon=_BN_EPSILON,
                           name='stem_bn1')(x)

    x, p = _reduction_A(x,
                        None,
                        filters // (filters_multiplier**2),
                        id='stem_1')
    x, p = _reduction_A(x, p, filters // filters_multiplier, id='stem_2')

    for i in range(nb_blocks):
        x, p = _normal_A(x, p, filters, id='%d' % (i))

    x, p0 = _reduction_A(x,
                         p,
                         filters * filters_multiplier,
                         id='reduce_%d' % (nb_blocks))

    p = p0 if not skip_reduction else p

    for i in range(nb_blocks):
        x, p = _normal_A(x,
                         p,
                         filters * filters_multiplier,
                         id='%d' % (nb_blocks + i + 1))

    auxilary_x = None
    if use_auxilary_branch:
        img_height = 1 if K.image_data_format() == 'channels_first' else 2
        img_width = 2 if K.image_data_format() == 'channels_first' else 3

        with K.name_scope('auxilary_branch'):
            auxilary_x = Activation('relu')(x)
            auxilary_x = AveragePooling2D((5, 5),
                                          strides=(3, 3),
                                          padding='valid',
                                          name='aux_pool')(auxilary_x)
            auxilary_x = Conv2D(128, (1, 1),
                                padding='same',
                                use_bias=False,
                                name='aux_conv_projection',
                                kernel_initializer='he_normal')(auxilary_x)
            auxilary_x = BatchNormalization(
                axis=channel_dim,
                momentum=_BN_DECAY,
                epsilon=_BN_EPSILON,
                name='aux_bn_projection')(auxilary_x)
            auxilary_x = Activation('relu')(auxilary_x)

            auxilary_x = Conv2D(768, (auxilary_x._keras_shape[img_height],
                                      auxilary_x._keras_shape[img_width]),
                                padding='valid',
                                use_bias=False,
                                kernel_initializer='he_normal',
                                name='aux_conv_reduction')(auxilary_x)
            auxilary_x = BatchNormalization(
                axis=channel_dim,
                momentum=_BN_DECAY,
                epsilon=_BN_EPSILON,
                name='aux_bn_reduction')(auxilary_x)
            auxilary_x = Activation('relu')(auxilary_x)

            auxilary_x = GlobalAveragePooling2D()(auxilary_x)
            auxilary_x = Dense(classes,
                               activation='softmax',
                               name='aux_predictions')(auxilary_x)

    x, p0 = _reduction_A(x,
                         p,
                         filters * filters_multiplier**2,
                         id='reduce_%d' % (2 * nb_blocks))

    p = p0 if not skip_reduction else p

    for i in range(nb_blocks):
        x, p = _normal_A(x,
                         p,
                         filters * filters_multiplier**2,
                         id='%d' % (2 * nb_blocks + i + 1))

    x = Activation('relu')(x)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        x = Dropout(dropout)(x)
        x = Dense(classes, activation='softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    if use_auxilary_branch:
        model = Model(inputs, [x, auxilary_x], name='NASNet_with_auxilary')
    else:
        model = Model(inputs, x, name='NASNet')

    # load weights (when available)
    warnings.warn(
        'Weights of NASNet models have not been ported yet for Keras.')

    if old_data_format:
        K.set_image_data_format(old_data_format)

    return model
Example #27
0
    def __call__(self, inputs, **kwargs):
        if isinstance(inputs, list):
            inputs = inputs[:]

        with K.name_scope(self.name):
            # Raise exceptions in case the input is not compatible
            # with the input_spec specified in the layer constructor.
            self.assert_input_compatibility(inputs)

            # Handle laying building (weight creating, input spec locking).
            if not self.built:
                self.build(inputs)
                self.built = True

            # Handle mask propagation.
            previous_mask = _collect_previous_mask(inputs)
            user_kwargs = copy.copy(kwargs)
            if not _is_all_none(previous_mask):
                # The previous layer generated a mask.
                if has_arg(self.call, 'mask'):
                    if 'mask' not in kwargs:
                        # If mask is explicitly passed to __call__,
                        # we should override the default mask.
                        kwargs['mask'] = previous_mask
            # Handle automatic shape inference (only useful for Theano).
            input_shape = _collect_input_shape(inputs)

            # Actually call the layer, collecting output(s), mask(s), and shape(s).
            output = self.call(inputs, **kwargs)
            output_mask = self.compute_mask(inputs, previous_mask)

            # If the layer returns tensors from its inputs, unmodified,
            # we copy them to avoid loss of tensor metadata.
            output_ls = _to_list(output)
            inputs_ls = _to_list(inputs)
            output_ls_copy = []
            for x in output_ls:
                if x in inputs_ls:
                    x = K.identity(x)
                output_ls_copy.append(x)
            if len(output_ls_copy) == 1:
                output = output_ls_copy[0]
            else:
                output = output_ls_copy

            # Inferring the output shape is only relevant for Theano.
            if all([s is not None for s in _to_list(input_shape)]):
                output_shape = self.compute_output_shape(input_shape)
            else:
                if isinstance(input_shape, list):
                    output_shape = [None for _ in input_shape]
                else:
                    output_shape = None

            if not isinstance(output_mask,
                              (list, tuple)) and len(output_ls) > 1:
                # Augment the mask to match the length of the output.
                output_mask = [output_mask] * len(output_ls)

            # Add an inbound node to the layer, so that it keeps track
            # of the call and of all new variables created during the call.
            # This also updates the layer history of the output tensor(s).
            # If the input tensor(s) had not previous Keras history,
            # this does nothing.
            self._add_inbound_node(input_tensors=inputs,
                                   output_tensors=output,
                                   input_masks=previous_mask,
                                   output_masks=output_mask,
                                   input_shapes=input_shape,
                                   output_shapes=output_shape,
                                   arguments=user_kwargs)

            # Apply activity regularizer if any:
            if hasattr(self, 'activity_regularizer'
                       ) and self.activity_regularizer is not None:
                regularization_losses = [
                    self.activity_regularizer(x) for x in _to_list(output)
                ]
                self.add_loss(regularization_losses, _to_list(inputs))
        return output
Example #28
0
def transform(images,
              transforms,
              fill_mode='reflect',
              fill_value=0.0,
              interpolation='bilinear',
              output_shape=None,
              name=None):
  """Applies the given transform(s) to the image(s).

  Args:
    images: A tensor of shape (num_images, num_rows, num_columns, num_channels)
      (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows,
      num_columns) (HW). The rank must be statically known (the shape is not
      `TensorShape(None)`.
    transforms: Projective transform matrix/matrices. A vector of length 8 or
      tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2,
      c0, c1], then it maps the *output* point `(x, y)` to a transformed *input*
      point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
      `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the
      transform mapping input points to output points. Note that gradients are
      not backpropagated into transformation parameters.
    fill_mode: Points outside the boundaries of the input are filled according
      to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
    fill_value: a float represents the value to be filled outside the boundaries
      when `fill_mode` is "constant".
    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
    output_shape: Output dimesion after the transform, [height, width]. If None,
      output is the same size as input image.
    name: The name of the op.  ## Fill mode.
  Behavior for each valid value is as follows:  reflect (d c b a | a b c d | d c
    b a) The input is extended by reflecting about the edge of the last pixel.
    constant (k k k k | a b c d | k k k k) The input is extended by filling all
    values beyond the edge with the same constant value k = 0.  wrap (a b c d |
    a b c d | a b c d) The input is extended by wrapping around to the opposite
    edge.  nearest (a a a a | a b c d | d d d d) The input is extended by the
    nearest pixel.
  Input shape:
    4D tensor with shape: `(samples, height, width, channels)`,
      data_format='channels_last'.
  Output shape:
    4D tensor with shape: `(samples, height, width, channels)`,
      data_format='channels_last'.

  Returns:
    Image(s) with the same type and shape as `images`, with the given
    transform(s) applied. Transformed coordinates outside of the input image
    will be filled with zeros.

  Raises:
    TypeError: If `image` is an invalid type.
    ValueError: If output shape is not 1-D int32 Tensor.
  """
  with backend.name_scope(name or 'transform'):
    if output_shape is None:
      output_shape = tf.compat.v1.shape(images)[1:3]
      if not tf.executing_eagerly():
        output_shape_value = tf.get_static_value(output_shape)
        if output_shape_value is not None:
          output_shape = output_shape_value

    output_shape = tf.convert_to_tensor(
        output_shape, tf.int32, name='output_shape')

    if not output_shape.get_shape().is_compatible_with([2]):
      raise ValueError('output_shape must be a 1-D Tensor of 2 elements: '
                       'new_height, new_width, instead got '
                       '{}'.format(output_shape))

    fill_value = tf.convert_to_tensor(
        fill_value, tf.float32, name='fill_value')

    return tf.raw_ops.ImageProjectiveTransformV3(
        images=images,
        output_shape=output_shape,
        fill_value=fill_value,
        transforms=transforms,
        fill_mode=fill_mode.upper(),
        interpolation=interpolation.upper())
Example #29
0
def get_weight_static_norm(model):
    with K.name_scope('w_static_norm'):
        weights = model.trainable_weights[0:22]
        w_norm = K.sqrt(sum([K.sum(K.square(w)) for w in weights]))
    return w_norm
Example #30
0
    def _ReductionCell(self, filters, prefix, prev, cur):
        with K.name_scope('reduce'):
            prev = self._Fit(filters=filters,
                             target_layer=cur,
                             prefix=prefix,
                             net=prev)
            cur = self._SqueezeChannels(filters=filters, prefix=prefix, x=cur)

            # Full in
            with K.name_scope('comb_iter_0'):
                prefix = '{}/comb_iter_0'.format(prefix)
                add_0 = Add()([
                    self._Separable(filters=filters,
                                    kernel_size=5,
                                    strides=2,
                                    prefix='{}/left'.format(prefix),
                                    net=cur),
                    self._Separable(filters=filters,
                                    kernel_size=7,
                                    strides=2,
                                    prefix='{}/right'.format(prefix),
                                    net=prev)
                ])

            with K.name_scope('comb_iter_1'):
                prefix = '{}/comb_iter_1'.format(prefix)
                add_1 = Add()([
                    MaxPooling2D(3, strides=2, padding='same')(cur),
                    self._Separable(filters=filters,
                                    kernel_size=7,
                                    strides=2,
                                    prefix='{}/right'.format(prefix),
                                    net=prev)
                ])

            with K.name_scope('comb_iter_2'):
                prefix = '{}/comb_iter_2'.format(prefix)
                add_2 = Add()([
                    AveragePooling2D(3, strides=2, padding='same')(cur),
                    self._Separable(filters=filters,
                                    kernel_size=5,
                                    strides=2,
                                    prefix='{}/right'.format(prefix),
                                    net=prev)
                ])

            # Reduced after stride
            with K.name_scope('comb_iter_3'):
                add_3 = Add()([
                    AveragePooling2D(3, strides=1, padding='same')(add_0),
                    add_1
                ])

            with K.name_scope('comb_iter_4'):
                prefix = '{}/comb_iter_4'.format(prefix)
                add_4 = Add()([
                    self._Separable(filters=filters,
                                    kernel_size=3,
                                    strides=1,
                                    prefix='{}/left'.format(prefix),
                                    net=add_0),
                    MaxPooling2D(3, strides=2, padding='same')(cur)
                ])

            return Concatenate(axis=-1)([add_1, add_2, add_3, add_4])
Example #31
0
def get_gradient_static_norm(model):
    with K.name_scope('gradient_static_norm'):
        grads_static = K.gradients(model.total_loss,
                                   model.trainable_weights[0:22])
        norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads_static]))
    return norm
Example #32
0
 def __init__(self, optimizer, gdev_list=None):
     self.optimizer = optimizer
     self._gdev_list = gdev_list
     with K.name_scope(self.__class__.__name__):
         self.iterations = K.variable(0, dtype='int64', name='iterations')
def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None):
    '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper)

    # Arguments:
        ip: input tensor `x`
        p: input tensor `p`
        filters: number of output filters
        weight_decay: l2 regularization weight
        id: string id

    # Returns:
        a Keras tensor
    '''
    """"""
    channel_dim = 1 if K.image_data_format() == 'channels_first' else -1

    with K.name_scope('reduction_A_block_%s' % id):
        p = _adjust_block(p, ip, filters, weight_decay, id)

        h = Activation('relu')(ip)
        h = Conv2D(filters, (1, 1),
                   strides=(1, 1),
                   padding='same',
                   name='reduction_conv_1_%s' % id,
                   use_bias=False,
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(weight_decay))(h)
        h = BatchNormalization(axis=channel_dim,
                               momentum=_BN_DECAY,
                               epsilon=_BN_EPSILON,
                               name='reduction_bn_1_%s' % id)(h)

        with K.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         weight_decay=weight_decay,
                                         id='reduction_left1_%s' % id)
            x1_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         weight_decay=weight_decay,
                                         id='reduction_1_%s' % id)
            x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % id)

        with K.name_scope('block_2'):
            x2_1 = MaxPooling2D((3, 3),
                                strides=(2, 2),
                                padding='same',
                                name='reduction_left2_%s' % id)(h)
            x2_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         weight_decay=weight_decay,
                                         id='reduction_right2_%s' % id)
            x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % id)

        with K.name_scope('block_3'):
            x3_1 = AveragePooling2D((3, 3),
                                    strides=(2, 2),
                                    padding='same',
                                    name='reduction_left3_%s' % id)(h)
            x3_2 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         weight_decay=weight_decay,
                                         id='reduction_right3_%s' % id)
            x3 = add([x3_1, x3_2], name='reduction_add3_%s' % id)

        with K.name_scope('block_4'):
            x4 = AveragePooling2D((3, 3),
                                  strides=(1, 1),
                                  padding='same',
                                  name='reduction_left4_%s' % id)(x1)
            x4 = add([x2, x4])

        with K.name_scope('block_5'):
            x5_1 = _separable_conv_block(x1,
                                         filters, (3, 3),
                                         weight_decay=weight_decay,
                                         id='reduction_left4_%s' % id)
            x5_2 = MaxPooling2D((3, 3),
                                strides=(2, 2),
                                padding='same',
                                name='reduction_right5_%s' % id)(h)
            x5 = add([x5_1, x5_2], name='reduction_add4_%s' % id)

        x = concatenate([x2, x3, x4, x5],
                        axis=channel_dim,
                        name='reduction_concat_%s' % id)
        return x, ip
Example #34
0
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1,
                       nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4,
                       subsample_initial_block=False, pooling=None, activation='softmax', transition_pooling='avg'):
    ''' Build the DenseNet model

    # Arguments
        nb_classes: number of classes
        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
        include_top: flag to include the final Dense layer
        depth: number or layers
        nb_dense_block: number of dense blocks to add to end (generally = 3)
        growth_rate: number of filters to add per dense block
        nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate
        nb_layers_per_block: number of layers in each dense block.
                Can be a -1, positive integer or a list.
                If -1, calculates nb_layer_per_block from the depth of the network.
                If positive integer, a set number of layers per dense block.
                If list, nb_layer is used as provided. Note that list size must
                be (nb_dense_block + 1)
        bottleneck: add bottleneck blocks
        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
        dropout_rate: dropout rate
        weight_decay: weight decay rate
        subsample_initial_block: Changes model type to suit different datasets.
            Should be set to True for ImageNet, and False for CIFAR datasets.
            When set to True, the initial convolution will be strided and
            adds a MaxPooling2D before the initial dense block.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
                Note that if sigmoid is used, classes must be 1.
        transition_pooling: `avg` for avg pooling (default), `max` for max pooling,
            None for no pooling during scale transition blocks. Please note that this
            default differs from the DenseNetFCN paper in accordance with the DenseNet
            paper.

    # Returns
        a keras tensor

    # Raises
        ValueError: in case of invalid argument for `reduction`
            or `nb_dense_block`
    '''
    with K.name_scope('DenseNet'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        if reduction != 0.0:
            if not (reduction <= 1.0 and reduction > 0.0):
                raise ValueError('`reduction` value must lie between 0.0 and 1.0')

        # layers in each dense block
        if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
            nb_layers = list(nb_layers_per_block)  # Convert tuple to list

            if len(nb_layers) != (nb_dense_block):
                raise ValueError('If `nb_dense_block` is a list, its length must match '
                                 'the number of layers provided by `nb_layers`.')

            final_nb_layer = nb_layers[-1]
            nb_layers = nb_layers[:-1]
        else:
            if nb_layers_per_block == -1:
                assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1'
                count = int((depth - 4) / 3)

                if bottleneck:
                    count = count // 2

                nb_layers = [count for _ in range(nb_dense_block)]
                final_nb_layer = count
            else:
                final_nb_layer = nb_layers_per_block
                nb_layers = [nb_layers_per_block] * nb_dense_block

        # compute initial nb_filter if -1, else accept users initial nb_filter
        if nb_filter <= 0:
            nb_filter = 2 * growth_rate

        # compute compression factor
        compression = 1.0 - reduction

        # Initial convolution
        if subsample_initial_block:
            initial_kernel = (7, 7)
            initial_strides = (2, 2)
        else:
            initial_kernel = (3, 3)
            initial_strides = (1, 1)

        x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', name='initial_conv2D',
                   strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)

        if subsample_initial_block:
            x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x)
            x = Activation('relu')(x)
            x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

        # Add dense blocks
        for block_idx in range(nb_dense_block - 1):
            x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck,
                                         dropout_rate=dropout_rate, weight_decay=weight_decay,
                                         block_prefix='dense_%i' % block_idx)
            # add transition_block
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling)
            nb_filter = int(nb_filter * compression)

        # The last dense_block does not have a transition_block
        x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
                                     dropout_rate=dropout_rate, weight_decay=weight_decay,
                                     block_prefix='dense_%i' % (nb_dense_block - 1))

        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x)
        x = Activation('relu')(x)

        if include_top:
            if pooling == 'avg':
                x = GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = GlobalMaxPooling2D()(x)
            x = Dense(nb_classes, activation=activation)(x)
        else:
            if pooling == 'avg':
                x = GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = GlobalMaxPooling2D()(x)

        return x
Example #35
0
def dense_bn(x, units, use_bias=True, scope=None, activation=None):
    with K.name_scope(scope):
        x = Dense(units=units, use_bias=use_bias)(x)
        x = BatchNormalization(momentum=0.9)(x)
        x = Activation(activation)(x)
    return x
Example #36
0
def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12,
                           reduction=0.0, dropout_rate=None, weight_decay=1e-4,
                           nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv',
                           init_conv_filters=48, input_shape=None, activation='softmax',
                           early_transition=False, transition_pooling='max', initial_kernel_size=(3, 3)):
    ''' Build the DenseNet-FCN model

    # Arguments
        nb_classes: number of classes
        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
        include_top: flag to include the final Dense layer
        nb_dense_block: number of dense blocks to add to end (generally = 3)
        growth_rate: number of filters to add per dense block
        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
        dropout_rate: dropout rate
        weight_decay: weight decay
        nb_layers_per_block: number of layers in each dense block.
            Can be a positive integer or a list.
            If positive integer, a set number of layers per dense block.
            If list, nb_layer is used as provided. Note that list size must
            be (nb_dense_block + 1)
        nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
        upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines
            type of upsampling algorithm used.
        input_shape: Only used for shape inference in fully convolutional networks.
        activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
                    Note that if sigmoid is used, classes must be 1.
        early_transition: Start with an extra initial transition down and end with an extra
            transition up to reduce the network size.
        transition_pooling: 'max' for max pooling (default), 'avg' for average pooling,
            None for no pooling. Please note that this default differs from the DenseNet
            paper in accordance with the DenseNetFCN paper.
        initial_kernel_size: The first Conv2D kernel might vary in size based on the
            application, this parameter makes it configurable.

    # Returns
        a keras tensor

    # Raises
        ValueError: in case of invalid argument for `reduction`,
            `nb_dense_block` or `nb_upsampling_conv`.
    '''
    with K.name_scope('DenseNetFCN'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        if concat_axis == 1:  # channels_first dim ordering
            _, rows, cols = input_shape
        else:
            rows, cols, _ = input_shape

        if reduction != 0.0:
            if not (reduction <= 1.0 and reduction > 0.0):
                raise ValueError('`reduction` value must lie between 0.0 and 1.0')

        # check if upsampling_conv has minimum number of filters
        # minimum is set to 12, as at least 3 color channels are needed for correct upsampling
        if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0):
            raise ValueError('Parameter `nb_upsampling_conv` number of channels must '
                             'be a positive number divisible by 4 and greater than 12')

        # layers in each dense block
        if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
            nb_layers = list(nb_layers_per_block)  # Convert tuple to list

            if len(nb_layers) != (nb_dense_block + 1):
                raise ValueError('If `nb_dense_block` is a list, its length must be '
                                 '(`nb_dense_block` + 1)')

            bottleneck_nb_layers = nb_layers[-1]
            rev_layers = nb_layers[::-1]
            nb_layers.extend(rev_layers[1:])
        else:
            bottleneck_nb_layers = nb_layers_per_block
            nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1)

        # compute compression factor
        compression = 1.0 - reduction

        # Initial convolution
        x = Conv2D(init_conv_filters, initial_kernel_size, kernel_initializer='he_normal', padding='same', name='initial_conv2D',
                   use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)
        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x)
        x = Activation('relu')(x)

        nb_filter = init_conv_filters

        skip_list = []

        if early_transition:
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_early', transition_pooling=transition_pooling)

        # Add dense blocks and transition down block
        for block_idx in range(nb_dense_block):
            x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate,
                                         weight_decay=weight_decay, block_prefix='dense_%i' % block_idx)

            # Skip connection
            skip_list.append(x)

            # add transition_block
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling)

            nb_filter = int(nb_filter * compression)  # this is calculated inside transition_down_block

        # The last dense_block does not have a transition_down_block
        # return the concatenated feature maps without the concatenation of the input
        _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate,
                                                  dropout_rate=dropout_rate, weight_decay=weight_decay,
                                                  return_concat_list=True,
                                                  block_prefix='dense_%i' % nb_dense_block)

        skip_list = skip_list[::-1]  # reverse the skip list

        # Add dense blocks and transition up block
        for block_idx in range(nb_dense_block):
            n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx]

            # upsampling block must upsample only the feature maps (concat_list[1:]),
            # not the concatenation of the input with the feature maps (concat_list[0].
            l = concatenate(concat_list[1:], axis=concat_axis)

            t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay,
                                      block_prefix='tr_up_%i' % block_idx)

            # concatenate the skip connection with the transition block
            x = concatenate([t, skip_list[block_idx]], axis=concat_axis)

            # Dont allow the feature map size to grow in upsampling dense blocks
            x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1],
                                                         nb_filter=growth_rate, growth_rate=growth_rate,
                                                         dropout_rate=dropout_rate, weight_decay=weight_decay,
                                                         return_concat_list=True, grow_nb_filters=False,
                                                         block_prefix='dense_%i' % (nb_dense_block + 1 + block_idx))

        if early_transition:
            x_up = __transition_up_block(x_up, nb_filters=nb_filter, type=upsampling_type, weight_decay=weight_decay,
                                         block_prefix='tr_up_early')
        if include_top:
            x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up)

            if K.image_data_format() == 'channels_first':
                channel, row, col = input_shape
            else:
                row, col, channel = input_shape

            x = Reshape((row * col, nb_classes))(x)
            x = Activation(activation)(x)
            x = Reshape((row, col, nb_classes))(x)
        else:
            x = x_up

        return x
Example #37
0
def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12,
                           reduction=0.0, dropout_rate=None, weight_decay=1e-4,
                           nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv',
                           init_conv_filters=48, input_shape=None, activation='sigmoid',
                           early_transition=False, transition_pooling='max', initial_kernel_size=(3, 3)):
    ''' Build the DenseNet-FCN model

    # Arguments
        nb_classes: number of classes
        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
        include_top: flag to include the final Dense layer
        nb_dense_block: number of dense blocks to add to end (generally = 3)
        growth_rate: number of filters to add per dense block
        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
        dropout_rate: dropout rate
        weight_decay: weight decay
        nb_layers_per_block: number of layers in each dense block.
            Can be a positive integer or a list.
            If positive integer, a set number of layers per dense block.
            If list, nb_layer is used as provided. Note that list size must
            be (nb_dense_block + 1)
        nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
        upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines
            type of upsampling algorithm used.
        input_shape: Only used for shape inference in fully convolutional networks.
        activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
                    Note that if sigmoid is used, classes must be 1.
        early_transition: Start with an extra initial transition down and end with an extra
            transition up to reduce the network size.
        transition_pooling: 'max' for max pooling (default), 'avg' for average pooling,
            None for no pooling. Please note that this default differs from the DenseNet
            paper in accordance with the DenseNetFCN paper.
        initial_kernel_size: The first Conv2D kernel might vary in size based on the
            application, this parameter makes it configurable.

    # Returns
        a keras tensor

    # Raises
        ValueError: in case of invalid argument for `reduction`,
            `nb_dense_block` or `nb_upsampling_conv`.
    '''
    with K.name_scope('DenseNetFCN'):
        concat_axis = 1 if K.image_data_format() == 'channels_first' else -1

        if concat_axis == 1:  # channels_first dim ordering
            _, rows, cols = input_shape
        else:
            rows, cols, _ = input_shape

        if reduction != 0.0:
            if not (reduction <= 1.0 and reduction > 0.0):
                raise ValueError('`reduction` value must lie between 0.0 and 1.0')

        # check if upsampling_conv has minimum number of filters
        # minimum is set to 12, as at least 3 color channels are needed for correct upsampling
        if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0):
            raise ValueError('Parameter `nb_upsampling_conv` number of channels must '
                             'be a positive number divisible by 4 and greater than 12')

        # layers in each dense block
        if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
            nb_layers = list(nb_layers_per_block)  # Convert tuple to list

            if len(nb_layers) != (nb_dense_block + 1):
                raise ValueError('If `nb_dense_block` is a list, its length must be '
                                 '(`nb_dense_block` + 1)')

            bottleneck_nb_layers = nb_layers[-1]
            rev_layers = nb_layers[::-1]
            nb_layers.extend(rev_layers[1:])
        else:
            bottleneck_nb_layers = nb_layers_per_block
            nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1)

        # compute compression factor
        compression = 1.0 - reduction

        # Initial convolution
        x = Conv2D(init_conv_filters, initial_kernel_size, kernel_initializer='he_normal', padding='same', name='initial_conv2D',
                   use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)
        x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x)
        x = Activation('relu')(x)

        nb_filter = init_conv_filters

        skip_list = []

        if early_transition:
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_early', transition_pooling=transition_pooling)

        # Add dense blocks and transition down block
        for block_idx in range(nb_dense_block):
            x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate,
                                         weight_decay=weight_decay, block_prefix='dense_%i' % block_idx)

            # Skip connection
            skip_list.append(x)

            # add transition_block
            x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay,
                                   block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling)

            nb_filter = int(nb_filter * compression)  # this is calculated inside transition_down_block

        # The last dense_block does not have a transition_down_block
        # return the concatenated feature maps without the concatenation of the input
        _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate,
                                                  dropout_rate=dropout_rate, weight_decay=weight_decay,
                                                  return_concat_list=True,
                                                  block_prefix='dense_%i' % nb_dense_block)

        skip_list = skip_list[::-1]  # reverse the skip list

        # Add dense blocks and transition up block
        for block_idx in range(nb_dense_block):
            n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx]

            # upsampling block must upsample only the feature maps (concat_list[1:]),
            # not the concatenation of the input with the feature maps (concat_list[0].
            l = concatenate(concat_list[1:], axis=concat_axis)

            t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay,
                                      block_prefix='tr_up_%i' % block_idx)

            # concatenate the skip connection with the transition block
            x = concatenate([t, skip_list[block_idx]], axis=concat_axis)

            # Dont allow the feature map size to grow in upsampling dense blocks
            x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1],
                                                         nb_filter=growth_rate, growth_rate=growth_rate,
                                                         dropout_rate=dropout_rate, weight_decay=weight_decay,
                                                         return_concat_list=True, grow_nb_filters=False,
                                                         block_prefix='dense_%i' % (nb_dense_block + 1 + block_idx))

        if early_transition:
            x_up = __transition_up_block(x_up, nb_filters=nb_filter, type=upsampling_type, weight_decay=weight_decay,
                                         block_prefix='tr_up_early')
        if include_top:
            x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up)

            if K.image_data_format() == 'channels_first':
                channel, row, col = input_shape
            else:
                row, col, channel = input_shape

            x = Reshape((row * col, nb_classes))(x)
            x = Activation(activation)(x)
            x = Reshape((row, col, nb_classes))(x)
        else:
            x = x_up

        return x
Example #38
0
    def build(self, input_shapes):
        vdim = input_shapes[0][2]
        edim = input_shapes[1][2]
        udim = input_shapes[2][2]

        with kb.name_scope(self.name):
            with kb.name_scope('phi_v'):
                v_shapes = [self.units_e[-1] + vdim + udim] + self.units_v
                v_shapes = list(zip(v_shapes[:-1], v_shapes[1:]))
                self.phi_v_weights = [
                    self.add_weight(shape=i,
                                    initializer=self.kernel_initializer,
                                    name='weight_v_%d' % j,
                                    regularizer=self.kernel_regularizer,
                                    constraint=self.kernel_constraint)
                    for j, i in enumerate(v_shapes)
                ]
                if self.use_bias:
                    self.phi_v_biases = [
                        self.add_weight(shape=(i[-1], ),
                                        initializer=self.bias_initializer,
                                        name='bias_v_%d' % j,
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
                        for j, i in enumerate(v_shapes)
                    ]
                else:
                    self.phi_v_biases = None

            with kb.name_scope('phi_e'):
                e_shapes = [2 * vdim + edim + udim] + self.units_e
                e_shapes = list(zip(e_shapes[:-1], e_shapes[1:]))
                self.phi_e_weights = [
                    self.add_weight(shape=i,
                                    initializer=self.kernel_initializer,
                                    name='weight_e_%d' % j,
                                    regularizer=self.kernel_regularizer,
                                    constraint=self.kernel_constraint)
                    for j, i in enumerate(e_shapes)
                ]
                if self.use_bias:
                    self.phi_e_biases = [
                        self.add_weight(shape=(i[-1], ),
                                        initializer=self.bias_initializer,
                                        name='bias_e_%d' % j,
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
                        for j, i in enumerate(e_shapes)
                    ]
                else:
                    self.phi_e_biases = None

            with kb.name_scope('phi_u'):
                u_shapes = [self.units_e[-1] + self.units_v[-1] + udim
                            ] + self.units_u
                u_shapes = list(zip(u_shapes[:-1], u_shapes[1:]))
                self.phi_u_weights = [
                    self.add_weight(shape=i,
                                    initializer=self.kernel_initializer,
                                    name='weight_u_%d' % j,
                                    regularizer=self.kernel_regularizer,
                                    constraint=self.kernel_constraint)
                    for j, i in enumerate(u_shapes)
                ]
                if self.use_bias:
                    self.phi_u_biases = [
                        self.add_weight(shape=(i[-1], ),
                                        initializer=self.bias_initializer,
                                        name='bias_u_%d' % j,
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
                        for j, i in enumerate(u_shapes)
                    ]
                else:
                    self.phi_u_biases = None
        self.built = True
Example #39
0
    def build(self):
        inputs = Input(batch_shape=self.input_tensor)
        prev, cur = self.stem(filters=self.filters,
                              stem_filters=self.stem_filters,
                              net=inputs)

        for repeat in range(self.num_reduction_cells + 1):
            if repeat == self.num_reduction_cells and self.add_aux_output:
                prefix = 'aux_{}'.format(repeat * self.num_cell_repeats - 1)
                aux_outputs = self._AuxiliaryTop(classes=self.num_classes,
                                                 prefix=prefix,
                                                 net=cur)

            if repeat > 0:
                self.filters *= 2
                prev, cur = cur, prev
                cur = self._ReductionCell(
                    filters=self.filters,
                    prefix='reduction_cell_{}'.format(repeat - 1),
                    cur=prev,
                    prev=cur)

            for cell_index in range(self.num_cell_repeats):
                prev, cur = cur, prev
                cur = self._NormalCell(
                    filters=self.filters,
                    prefix='cell_{}'.format(cell_index +
                                            repeat * self.num_cell_repeats),
                    cur=prev,
                    prev=cur)

        with K.name_scope('final_layer'):
            x = Activation('relu', name='last_relu')(cur)

            if self.include_top:
                x = GlobalAveragePooling2D(name='avg_pool')(x)
                x = Dropout(rate=self.dropout_rate)(x)
                outputs = Dense(self.num_classes,
                                activation='softmax',
                                name='final_layer/FC')(x)

                model_suffix = 'with_top'
            else:
                if self.pooling == 'avg':
                    outputs = GlobalAveragePooling2D(name='avg_pool')(x)
                elif self.pooling == 'max':
                    outputs = GlobalMaxPooling2D(name='max_pool')(x)
                else:
                    outputs = None
                    raise Exception(
                        'Supported options for pooling: `avg` or `max` given pooling: {}'
                        .format(self.pooling))

                model_suffix = 'no_top'
        model_name = 'NASNet-A_{}@{}_{}_{}'.format(self.num_cell_repeats,
                                                   self.penultimate_filters,
                                                   self.num_classes,
                                                   model_suffix)
        if self.add_aux_output:
            model = Model(inputs, [outputs, aux_outputs],
                          name='{}_with_auxiliary_output'.format(model_name))
            model.summary()
            return model
        else:
            model = Model(inputs, outputs, name=model_name)
            model.summary()
            return model
def _adjust_block(p, ip, filters, block_id=None):
    '''Adjusts the input `previous path` to match the shape of the `input`.

    Used in situations where the output number of filters needs to be changed.

    # Arguments
        p: Input tensor which needs to be modified
        ip: Input tensor whose shape needs to be matched
        filters: Number of output filters to be matched
        block_id: String block_id

    # Returns
        Adjusted Keras tensor
    '''
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    img_dim = 2 if backend.image_data_format() == 'channels_first' else -2

    ip_shape = backend.int_shape(ip)

    if p is not None:
        p_shape = backend.int_shape(p)

    with backend.name_scope('adjust_block'):
        if p is None:
            p = ip

        elif p_shape[img_dim] != ip_shape[img_dim]:
            with backend.name_scope('adjust_reduction_block_%s' % block_id):
                p = layers.Activation('relu',
                                      name='adjust_relu_1_%s' % block_id)(p)
                p1 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_1_%s' % block_id)(p)
                p1 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   kernel_regularizer=l2(weight_decay),
                                   use_bias=False,
                                   name='adjust_conv_1_%s' % block_id,
                                   kernel_initializer='he_normal')(p1)

                p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
                p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2)
                p2 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_2_%s' % block_id)(p2)
                p2 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   kernel_regularizer=l2(weight_decay),
                                   use_bias=False,
                                   name='adjust_conv_2_%s' % block_id,
                                   kernel_initializer='he_normal')(p2)

                p = layers.concatenate([p1, p2], axis=channel_dim)
                if use_bn:
                    p = layers.BatchNormalization(axis=channel_dim,
                                                  momentum=bn_momentum,
                                                  epsilon=1e-3,
                                                  name='adjust_bn_%s' %
                                                  block_id)(p)
                    p = layers.SpatialDropout2D(drop_p)(p)

        elif p_shape[channel_dim] != filters:
            with backend.name_scope('adjust_projection_block_%s' % block_id):
                p = layers.Activation('relu')(p)
                p = layers.Conv2D(filters, (1, 1),
                                  strides=(1, 1),
                                  kernel_regularizer=l2(weight_decay),
                                  padding='same',
                                  name='adjust_conv_projection_%s' % block_id,
                                  use_bias=False,
                                  kernel_initializer='he_normal')(p)
                if use_bn:
                    p = layers.BatchNormalization(axis=channel_dim,
                                                  momentum=bn_momentum,
                                                  epsilon=1e-3,
                                                  name='adjust_bn_%s' %
                                                  block_id)(p)
                    p = layers.SpatialDropout2D(drop_p)(p)
    return p