Ejemplo n.º 1
0
def __create_res_next(nb_classes, img_input, cardinality=8, weight_decay=5e-4):
    # TODO 网络搭建
    # x_dense = __create_res_next(classes, input, cardinality, weight_decay)

    # 三层模块的滤波器个数
    # filters_list = [64, 128, 256, 512]  # 64, 128, 256, 512
    if cardinality == 6:
        filters_list = [48, 96, 192, 384]
    elif cardinality == 8:
        filters_list = [64, 128, 256, 512]
    elif cardinality == 10:
        filters_list = [80, 160, 320, 640]

    # TODO 初始化卷积层
    x = __initial_conv_block(img_input, weight_decay)

    # TODO 第一个模块
    x_1 = __bottleneck_block(x,
                             filters_list[0],
                             cardinality,
                             strides=1,
                             spa_kernel_size=(3, 3, 1),
                             spa_attention=True,
                             weight_decay=weight_decay)

    # TODO 第二个模块
    x_2 = __bottleneck_block(x_1,
                             filters_list[1],
                             cardinality,
                             strides=2,
                             spa_kernel_size=(3, 3, 1),
                             spa_attention=True,
                             weight_decay=weight_decay)

    # TODO 第三个模块
    x_3 = __bottleneck_block(x_2,
                             filters_list[2],
                             cardinality,
                             strides=2,
                             spa_kernel_size=(1, 1, 1),
                             spa_attention=True,
                             weight_decay=weight_decay)

    # TODO 第四个模块
    x_4 = __bottleneck_block(x_3,
                             filters_list[3],
                             cardinality,
                             strides=2,
                             spa_attention=False,
                             weight_decay=weight_decay)

    x_gap = GlobalAveragePooling3D()(x_4)

    x_dense = Dense(nb_classes,
                    use_bias=False,
                    kernel_regularizer=l2(weight_decay),
                    kernel_initializer='he_normal',
                    activation='softmax')(x_gap)

    return x_dense
Ejemplo n.º 2
0
def _bn_relu2(x, bn_name=None, relu_name=None):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
    norm = GlobalAveragePooling3D()(norm)
    norm = Dropout(0.25)(norm)
    norm = Dense(units=2, activation='softmax',
                 kernel_initializer="he_normal")(norm)
    return norm
Ejemplo n.º 3
0
    def squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
        with tf.name_scope(layer_name):

            squeeze = GlobalAveragePooling3D()(input_x)

            excitation = Dense(units=out_dim / ratio)(squeeze)
            excitation = Activation("relu")(excitation)
            excitation = Dense(units=out_dim)(excitation)
            excitation = Activation("sigmoid")(excitation)
            excitation = Reshape([1, 1, 1, out_dim])(excitation)
            scale = Multiply()([input_x, excitation])

            return scale
Ejemplo n.º 4
0
    def build(self, input_shape, num_output, repetitions=3):
        input_x = Input(shape=input_shape)

        x = self.extract_feature(repetitions=repetitions)(input_x)[-1]
        x = GlobalAveragePooling3D()(x)
        x = Flatten()(x)

        x = Dense(units=num_output,
                  name='final_fully_connected',
                  kernel_initializer="he_normal",
                  kernel_regularizer=l2(1e-4),
                  activation='softmax')(x)

        return Model(inputs=input_x, outputs=x)
Ejemplo n.º 5
0
def channel_attention_block(input, filters, kernel_size, padding, reduction_ratio, weight_decay=5e-4):
    x = Conv3D(filters=filters, kernel_size=kernel_size, padding=padding, use_bias=False,
               kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)

    squeeze = GlobalAveragePooling3D()(x)

    excitation = Reshape((1, 1, 1, filters))(squeeze)
    excitation = Conv3D(filters=filters // reduction_ratio, kernel_size=1,
                        use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(excitation)
    excitation = Activation('relu')(excitation)
    excitation = Conv3D(filters=filters, kernel_size=1,
                        use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(excitation)
    excitation = Activation('sigmoid')(excitation)

    scale = multiply([x, excitation])

    x_add = add([scale, input])

    return x_add
Ejemplo n.º 6
0
def SlowFast_Network(clip_shape=[64, 224, 224, 3],
                     num_class=400,
                     alpha=8,
                     beta=1 / 8,
                     tau=16,
                     method='T_conv'):
    """Instantiates the SlowFast_Network architecture.

  Arguments:
      clip_shape: video_clip_shape

      num_class: numbers of videos class

      alpha:  mentioned in paper
      beta:   mentioned in paper
      tau:    mentioned in paper
      method: one of ['T_conv','T_sample','TtoC_sum','TtoC_concat'] mentioned in paper

  Returns:
      A Keras model instance.


  Raises:
      ValueError: in case of invalid argument for `method`
  """

    clip_shape = clip_shape
    slow_input_shape = [
        int(clip_shape[0] / tau), clip_shape[1], clip_shape[2], clip_shape[3]
    ]
    fast_input_shape = [
        int(slow_input_shape[0] * alpha), slow_input_shape[1],
        slow_input_shape[2], slow_input_shape[3]
    ]
    print('slow_path_input_shape', slow_input_shape)
    print('fast_path_input_shape', fast_input_shape)
    slow_input = Input(shape=slow_input_shape)
    fast_input = Input(shape=fast_input_shape)
    if K.image_data_format() == 'channels_last':
        bn_axis = 4
    else:
        bn_axis = 1

    # ---fast pathway---
    x_fast = Conv3D(64, (5, 7, 7),
                    strides=(1, 2, 2),
                    padding='same',
                    name='fast_conv1')(fast_input)
    x_fast = BatchNormalization(axis=bn_axis, name='fast_bn_conv1')(x_fast)
    x_fast = Activation('relu')(x_fast)
    pool1_fast = MaxPooling3D((1, 3, 3), strides=(1, 2, 2),
                              name='poo1_fast')(x_fast)

    x_fast = conv_block(
        pool1_fast, [1, 3, 3],
        [int(64 * beta), int(64 * beta),
         int(256 * beta)],
        stage=2,
        block='a',
        path='fast',
        strides=(1, 1, 1),
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(64 * beta), int(64 * beta),
         int(256 * beta)],
        stage=2,
        path='fast',
        block='b',
        non_degenerate_temporal_conv=True)
    res2_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(64 * beta), int(64 * beta),
         int(256 * beta)],
        stage=2,
        path='fast',
        block='c',
        non_degenerate_temporal_conv=True)

    x_fast = conv_block(
        res2_fast, [1, 3, 3],
        [int(128 * beta), int(128 * beta),
         int(512 * beta)],
        stage=3,
        path='fast',
        block='a',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(128 * beta), int(128 * beta),
         int(512 * beta)],
        stage=3,
        path='fast',
        block='b',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(128 * beta), int(128 * beta),
         int(512 * beta)],
        stage=3,
        path='fast',
        block='c',
        non_degenerate_temporal_conv=True)
    res3_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(128 * beta), int(128 * beta),
         int(512 * beta)],
        stage=3,
        path='fast',
        block='d',
        non_degenerate_temporal_conv=True)

    x_fast = conv_block(
        res3_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='a',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='b',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='c',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='d',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='e',
        non_degenerate_temporal_conv=True)
    res4_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(256 * beta), int(256 * beta),
         int(1024 * beta)],
        stage=4,
        path='fast',
        block='f',
        non_degenerate_temporal_conv=True)

    x_fast = conv_block(
        res4_fast, [1, 3, 3],
        [int(512 * beta), int(512 * beta),
         int(2048 * beta)],
        stage=5,
        path='fast',
        block='a',
        non_degenerate_temporal_conv=True)
    x_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(512 * beta), int(512 * beta),
         int(2048 * beta)],
        stage=5,
        path='fast',
        block='b',
        non_degenerate_temporal_conv=True)
    res5_fast = identity_block(
        x_fast, [1, 3, 3],
        [int(512 * beta), int(512 * beta),
         int(2048 * beta)],
        stage=5,
        path='fast',
        block='c',
        non_degenerate_temporal_conv=True)

    # ---slow pathway---
    x = Conv3D(64, (1, 7, 7),
               strides=(1, 2, 2),
               padding='same',
               name='slow_conv1')(slow_input)
    x = BatchNormalization(axis=bn_axis, name='slow_bn_conv1')(x)
    x = Activation('relu')(x)
    pool1 = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), name='poo1_slow')(x)
    pool1_conection = lateral_connection(pool1_fast,
                                         pool1,
                                         alpha=alpha,
                                         beta=beta)

    x = conv_block(pool1_conection, [1, 3, 3], [64, 64, 256],
                   stage=2,
                   block='a',
                   strides=(1, 1, 1),
                   path='slow')
    x = identity_block(x, [1, 3, 3], [64, 64, 256],
                       stage=2,
                       block='b',
                       path='slow')
    res2 = identity_block(x, [1, 3, 3], [64, 64, 256],
                          stage=2,
                          block='c',
                          path='slow')
    res2_conection = lateral_connection(res2_fast,
                                        res2,
                                        alpha=alpha,
                                        beta=beta)

    x = conv_block(res2_conection, [1, 3, 3], [128, 128, 512],
                   stage=3,
                   block='a',
                   path='slow')
    x = identity_block(x, [1, 3, 3], [128, 128, 512],
                       stage=3,
                       block='b',
                       path='slow')
    x = identity_block(x, [1, 3, 3], [128, 128, 512],
                       stage=3,
                       block='c',
                       path='slow')
    res3 = identity_block(x, [1, 3, 3], [128, 128, 512],
                          stage=3,
                          block='d',
                          path='slow')
    res3_conection = lateral_connection(res3_fast,
                                        res3,
                                        alpha=alpha,
                                        beta=beta)

    x = conv_block(res3_conection, [1, 3, 3], [256, 256, 1024],
                   stage=4,
                   block='a',
                   path='slow',
                   non_degenerate_temporal_conv=True)
    x = identity_block(x, [1, 3, 3], [256, 256, 1024],
                       stage=4,
                       block='b',
                       path='slow',
                       non_degenerate_temporal_conv=True)
    x = identity_block(x, [1, 3, 3], [256, 256, 1024],
                       stage=4,
                       block='c',
                       path='slow',
                       non_degenerate_temporal_conv=True)
    x = identity_block(x, [1, 3, 3], [256, 256, 1024],
                       stage=4,
                       block='d',
                       path='slow',
                       non_degenerate_temporal_conv=True)
    x = identity_block(x, [1, 3, 3], [256, 256, 1024],
                       stage=4,
                       block='e',
                       path='slow',
                       non_degenerate_temporal_conv=True)
    res4 = identity_block(x, [1, 3, 3], [256, 256, 1024],
                          stage=4,
                          block='f',
                          path='slow',
                          non_degenerate_temporal_conv=True)
    res4_conection = lateral_connection(res4_fast,
                                        res4,
                                        alpha=alpha,
                                        beta=beta)

    x = conv_block(res4_conection, [1, 3, 3], [512, 512, 2048],
                   stage=5,
                   block='a',
                   path='slow',
                   non_degenerate_temporal_conv=True)
    x = identity_block(x, [1, 3, 3], [512, 512, 2048],
                       stage=5,
                       block='b',
                       path='slow',
                       non_degenerate_temporal_conv=True)
    res5 = identity_block(x, [1, 3, 3], [512, 512, 2048],
                          stage=5,
                          block='c',
                          path='slow',
                          non_degenerate_temporal_conv=True)

    fast_output = GlobalAveragePooling3D(name='avg_pool_fast')(res5_fast)
    slow_output = GlobalAveragePooling3D(name='avg_pool_slow')(res5)
    concat_output = Concatenate(axis=-1)([slow_output, fast_output])
    output = Dense(num_class, activation='softmax', name='fc')(concat_output)

    # Create model.
    inputs = [slow_input, fast_input]
    output = output
    model = Model(inputs, output, name='slowfast_resnet50')

    return model
Ejemplo n.º 7
0
def CLRNet(input_shape=None,
           classes=10,
           block='bottleneck',
           residual_unit='v2',
           repetitions=None,
           initial_filters=64,
           activation='softmax',
           include_top=True,
           input_tensor=None,
           dropout=None,
           transition_dilation_rate=(1, 1),
           initial_strides=(2, 2),
           initial_kernel_size=(7, 7),
           initial_pooling='max',
           final_pooling=None,
           top='classification'):
    """Builds a custom ResNet like architecture. Defaults to CLRNet50 v2.

    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the CLRNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.

    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError(
            'activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError(
            'sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]

    _handle_dim_ordering()
    if len(input_shape) != 4:
        raise Exception(
            "Input shape should be a tuple (frames,nb_channels, nb_rows, nb_cols)"
        )

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters,
                      kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        # x = MaxPooling3D(pool_size=(3, 3, 3), strides=initial_strides, padding="same")(x)
        x = MaxPooling3D(pool_size=(1, 3, 3), strides=None, padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(
            block_fn,
            filters=filters,
            stage=i,
            blocks=r,
            is_first_layer=(i == 0),
            dropout=dropout,
            transition_dilation_rates=transition_dilation_rates,
            transition_strides=transition_strides,
            residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation

    x = _bn_relu2(block)

    # Classifier block
    if include_top and top is 'classification':
        x = GlobalAveragePooling3D()(x)
        x = Dense(units=classes,
                  activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top is 'segmentation':
        x = ConvLSTM2D(classes, (1, 1),
                       activation='linear',
                       padding='same',
                       return_sequences=True)(x)
        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling3D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling3D()(x)
    model = Model(inputs=img_input, outputs=x)
    return model