def test_build_gan_model(self):

        if self.server_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR_SERVER is not set in the environment variables")

        # test default
        resnet18_model = ResNet18_Caffe(self.s,
                                        width=224,
                                        height=224,
                                        random_flip='HV',
                                        random_mutation='random'
                                        )
        branch = resnet18_model.to_functional_model(stop_layers=resnet18_model.layers[-1])

        # raise error
        self.assertRaises(DLPyError, lambda: GANModel(branch, branch))

        # change the output size for generator
        inp = Input(**branch.layers[0].config)
        generator = Conv2D(width=1, height=1, n_filters=224 * 224 * 3)(branch(inp))
        output = OutputLayer(n=1)(generator)
        generator = Model(self.s, inp, output)
        gan_model = GANModel(generator, branch)
        res = gan_model.models['generator'].print_summary()
        print(res)
        res = gan_model.models['discriminator'].print_summary()
        print(res)
Beispiel #2
0
def MobileNetV2(conn,
                model_table='MobileNetV2',
                n_classes=1000,
                n_channels=3,
                width=224,
                height=224,
                norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225),
                offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406),
                random_flip=None,
                random_crop=None,
                random_mutation=None,
                alpha=1):
    '''
    Generates a deep learning model with the MobileNetV2 architecture.
    The implementation is revised based on
    https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet_v2.py

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_table : string or dict or CAS table, optional
        Specifies the CAS table to store the deep learning model.
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 1000
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 224
    height : int, optional
        Specifies the height of the input layer.
        Default: 224
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
        Default: (255 * 0.229, 255 * 0.224, 255 * 0.225)
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
        Default: (255*0.485, 255*0.456, 255*0.406)
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'
    alpha : int, optional
        Specifies the width multiplier in the MobileNet paper
        Default: 1

    alpha : int, optional

    Returns
    -------
    :class:`Model`

    References
    ----------
    https://arxiv.org/abs/1801.04381

    '''
    def _make_divisible(v, divisor, min_value=None):
        # make number of channel divisible
        if min_value is None:
            min_value = divisor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v

    def _inverted_res_block(inputs, in_channels, expansion, stride, alpha,
                            filters, block_id):
        """
        Inverted Residual Block

        Parameters
        ----------
        inputs:
            Input tensor
        in_channels:
            Specifies the number of input tensor's channel
        expansion:
            expansion factor always applied to the input size.
        stride:
            the strides of the convolution
        alpha:
            width multiplier.
        filters:
            the dimensionality of the output space.
        block_id:
            block id used for naming layers

        """
        pointwise_conv_filters = int(filters * alpha)
        pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
        x = inputs
        prefix = 'block_{}_'.format(block_id)
        n_groups = in_channels

        if block_id:
            # Expand
            n_groups = expansion * in_channels
            x = Conv2d(expansion * in_channels,
                       1,
                       include_bias=False,
                       act='identity',
                       name=prefix + 'expand')(x)
            x = BN(name=prefix + 'expand_BN', act='identity')(x)
        else:
            prefix = 'expanded_conv_'

        # Depthwise
        x = GroupConv2d(n_groups,
                        n_groups,
                        3,
                        stride=stride,
                        act='identity',
                        include_bias=False,
                        name=prefix + 'depthwise')(x)
        x = BN(name=prefix + 'depthwise_BN', act='relu')(x)

        # Project
        x = Conv2d(pointwise_filters,
                   1,
                   include_bias=False,
                   act='identity',
                   name=prefix + 'project')(x)
        x = BN(name=prefix + 'project_BN',
               act='identity')(x)  # identity activation on narrow tensor

        if in_channels == pointwise_filters and stride == 1:
            return Res(name=prefix + 'add')([inputs, x]), pointwise_filters
        return x, pointwise_filters

    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='data')
    # compared with mobilenetv1, v2 introduces inverted residual structure.
    # and Non-linearities in narrow layers are removed.
    # inverted residual block does three convolutins: first is 1*1 convolution, second is depthwise convolution,
    # third is 1*1 convolution but without any non-linearity
    first_block_filters = _make_divisible(32 * alpha, 8)
    x = Conv2d(first_block_filters,
               3,
               stride=2,
               include_bias=False,
               name='Conv1',
               act='identity')(inp)
    x = BN(name='bn_Conv1', act='relu')(x)

    x, n_channels = _inverted_res_block(x,
                                        first_block_filters,
                                        filters=16,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=1,
                                        block_id=0)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=24,
                                        alpha=alpha,
                                        stride=2,
                                        expansion=6,
                                        block_id=1)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=24,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=2)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=32,
                                        alpha=alpha,
                                        stride=2,
                                        expansion=6,
                                        block_id=3)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=32,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=4)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=32,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=5)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=64,
                                        alpha=alpha,
                                        stride=2,
                                        expansion=6,
                                        block_id=6)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=64,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=7)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=64,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=8)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=64,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=9)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=96,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=10)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=96,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=11)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=96,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=12)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=160,
                                        alpha=alpha,
                                        stride=2,
                                        expansion=6,
                                        block_id=13)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=160,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=14)
    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=160,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=15)

    x, n_channels = _inverted_res_block(x,
                                        n_channels,
                                        filters=320,
                                        alpha=alpha,
                                        stride=1,
                                        expansion=6,
                                        block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = Conv2d(last_block_filters,
               1,
               include_bias=False,
               name='Conv_1',
               act='identity')(x)
    x = BN(name='Conv_1_bn', act='relu')(x)

    x = GlobalAveragePooling2D(name="Global_avg_pool")(x)
    x = OutputLayer(n=n_classes)(x)

    model = Model(conn, inp, x, model_table)
    model.compile()

    return model
Beispiel #3
0
def MobileNetV1(conn,
                model_table='MobileNetV1',
                n_classes=1000,
                n_channels=3,
                width=224,
                height=224,
                random_flip=None,
                random_crop=None,
                random_mutation=None,
                norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225),
                offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406),
                alpha=1,
                depth_multiplier=1):
    '''
    Generates a deep learning model with the MobileNetV1 architecture.
    The implementation is revised based on
    https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_table : string or dict or CAS table, optional
        Specifies the CAS table to store the deep learning model.
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 1000
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 32
    height : int, optional
        Specifies the height of the input layer.
        Default: 32
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
        Default: (255*0.229, 255*0.224, 255*0.225)
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
        Default: (255*0.485, 255*0.456, 255*0.406)
    alpha : int, optional
        Specifies the width multiplier in the MobileNet paper
        Default: 1
    depth_multiplier : int, optional
        Specifies the number of depthwise convolution output channels for each input channel.
        Default: 1

    Returns
    -------
    :class:`Model`

    References
    ----------
    https://arxiv.org/pdf/1605.07146.pdf

    '''
    def _conv_block(inputs, filters, alpha, kernel=3, stride=1):
        """
        Adds an initial convolution layer (with batch normalization

        inputs:
            Input tensor
        filters:
            the dimensionality of the output space
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        kernel:
            specifying the width and height of the 2D convolution window.
        strides:
            the strides of the convolution

        """
        filters = int(filters * alpha)
        x = Conv2d(filters,
                   kernel,
                   act='identity',
                   include_bias=False,
                   stride=stride,
                   name='conv1')(inputs)
        x = BN(name='conv1_bn', act='relu')(x)
        return x, filters

    def _depthwise_conv_block(inputs,
                              n_groups,
                              pointwise_conv_filters,
                              alpha,
                              depth_multiplier=1,
                              stride=1,
                              block_id=1):
        """Adds a depthwise convolution block.

        inputs:
            Input tensor
        n_groups : int
            number of groups
        pointwise_conv_filters:
            the dimensionality of the output space
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier:
            The number of depthwise convolution output channels
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution
        block_id: Integer, a unique identification designating
            the block number.

        """
        pointwise_conv_filters = int(pointwise_conv_filters * alpha)

        x = GroupConv2d(n_groups * depth_multiplier,
                        n_groups,
                        3,
                        stride=stride,
                        act='identity',
                        include_bias=False,
                        name='conv_dw_%d' % block_id)(inputs)
        x = BN(name='conv_dw_%d_bn' % block_id, act='relu')(x)

        x = Conv2d(pointwise_conv_filters,
                   1,
                   act='identity',
                   include_bias=False,
                   stride=1,
                   name='conv_pw_%d' % block_id)(x)
        x = BN(name='conv_pw_%d_bn' % block_id, act='relu')(x)
        return x, pointwise_conv_filters

    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='data')
    # the model down-sampled for 5 times by performing stride=2 convolution on
    # conv_dw_1, conv_dw_2, conv_dw_4, conv_dw_6, conv_dw_12
    # for each block, we use depthwise convolution with kernel=3 and point-wise convolution to save computation
    x, depth = _conv_block(inp, 32, alpha, stride=2)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     64,
                                     alpha,
                                     depth_multiplier,
                                     block_id=1)

    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     128,
                                     alpha,
                                     depth_multiplier,
                                     stride=2,
                                     block_id=2)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     128,
                                     alpha,
                                     depth_multiplier,
                                     block_id=3)

    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     256,
                                     alpha,
                                     depth_multiplier,
                                     stride=2,
                                     block_id=4)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     256,
                                     alpha,
                                     depth_multiplier,
                                     block_id=5)

    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     stride=2,
                                     block_id=6)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     block_id=7)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     block_id=8)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     block_id=9)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     block_id=10)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     512,
                                     alpha,
                                     depth_multiplier,
                                     block_id=11)

    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     1024,
                                     alpha,
                                     depth_multiplier,
                                     stride=2,
                                     block_id=12)
    x, depth = _depthwise_conv_block(x,
                                     depth,
                                     1024,
                                     alpha,
                                     depth_multiplier,
                                     block_id=13)

    x = GlobalAveragePooling2D(name="Global_avg_pool")(x)
    x = OutputLayer(n=n_classes)(x)

    model = Model(conn, inp, x, model_table)
    model.compile()

    return model
Beispiel #4
0
def ENet(conn,
         model_table='ENet',
         n_classes=2,
         n_channels=3,
         width=512,
         height=512,
         scale=1.0 / 255,
         norm_stds=None,
         offsets=None,
         random_mutation=None,
         init=None,
         random_flip=None,
         random_crop=None,
         output_image_type=None,
         output_image_prob=False):
    '''
    Generates a deep learning model with the E-Net architecture.

    Parameters
    ----------
    conn : CAS
        Specifies the connection of the CAS connection.
    model_table : string, optional
        Specifies the name of CAS table to store the model.
        Default: ENet
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 2
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 512
    height : int, optional
        Specifies the height of the input layer.
        Default: 512
    scale : double, optional
        Specifies a scaling factor to be applied to each pixel intensity values.
        Default: 1.0/255
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the
        input layer.
        Valid Values: 'none', 'random'
    init : str
        Specifies the initialization scheme for convolution layers.
        Valid Values: XAVIER, UNIFORM, NORMAL, CAUCHY, XAVIER1, XAVIER2, MSRA, MSRA1, MSRA2
        Default: None
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    output_image_type: string, optional
        Specifies the output image type of this layer.
        possible values: [ WIDE, PNG, BASE64 ]
        default: WIDE
    output_image_prob: bool, options
        Does not include probabilities if doing classification (default).


    Returns
    -------
    :class:`Sequential`

    References
    ----------
    https://arxiv.org/abs/1606.02147

    '''
    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='InputLayer_1')

    # initial
    x = initial_block(inp)

    # stage one
    x = downsampling_bottleneck(x, 16, 64)
    for i in range(4):
        x = regular_bottleneck(x, 64, 64)

    # stage two
    x = downsampling_bottleneck(x, 64, 128)
    for i in range(2):
        x = regular_bottleneck(x, 128, 128)
        x = regular_bottleneck(x, 128, 128)

    # stage three
    for i in range(2):
        x = regular_bottleneck(x, 128, 128)
        x = regular_bottleneck(x, 128, 128)

    # stage four
    x = upsampling_bottleneck(x, 128, 64)
    for i in range(2):
        x = regular_bottleneck(x, 64, 64)

    # stage five
    x = upsampling_bottleneck(x, 64, 16)
    x = regular_bottleneck(x, 16, 16)

    x = upsampling_bottleneck(x, 16, 16)
    conv = Conv2d(n_classes, 3, act='relu')(x)

    seg = Segmentation(name='Segmentation_1',
                       output_image_type=output_image_type,
                       output_image_prob=output_image_prob)(conv)

    model = Model(conn, inputs=inp, outputs=seg)
    model.compile()
    return model
Beispiel #5
0
def EfficientNet(conn, model_table='EfficientNet', n_classes=100, n_channels=3, width=224, height=224,
                 width_coefficient=1, depth_coefficient=1, dropout_rate=0.2, drop_connect_rate=0, depth_divisor=8,
                 activation_fn='relu', blocks_args=_MBConv_BLOCKS_ARGS,
                 offsets=(255*0.406, 255*0.456, 255*0.485), norm_stds=(255*0.225, 255*0.224, 255*0.229),
                 random_flip=None, random_crop=None, random_mutation=None):
    '''
    Generates a deep learning model with the EfficientNet architecture.
    The implementation is revised based on
    https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_table : string or dict or CAS table, optional
        Specifies the CAS table to store the deep learning model.
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 1000
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 224
    height : int, optional
        Specifies the height of the input layer.
        Default: 224
    width_coefficient: double, optional
        Specifies the scale coefficient for network width.
        Default: 1.0
    depth_coefficient: double, optional
        Specifies the scale coefficient for network depth.
        Default: 1.0
    dropout_rate: double, optional
        Specifies the dropout rate before final classifier layer.
        Default: 0.2
    drop_connect_rate: double, optional
        Specifies the dropout rate at skip connections.
        Default: 0.0
    depth_divisor: integer, optional
        Specifies the unit of network width.
        Default: 8
    activation_fn: string, optional
        Specifies the activation function
    blocks_args: list of dicts
         Specifies parameters to construct blocks for the efficientnet model.
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
        Default: (255*0.406, 255*0.456, 255*0.485)
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
        Default: (255*0.225, 255*0.224, 255*0.229)
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'


    Returns
    -------
    :class:`Model`

    References
    ----------
    https://arxiv.org/pdf/1905.11946.pdf

    '''

    def round_filters(filters, width_coefficient, depth_divisor):
        '''
        round the number of the scaled width, which is for width scaling in efficientnet.
        Parameters
        ----------
        filters: integer
            Specifies the number of filters.
        width_coefficient: double
            Specifies the scale coefficient for network width.
        depth_divisor: integer
            Specifies the unit of network width.
        '''

        filters *= width_coefficient
        new_filters = int(filters + depth_divisor / 2) // depth_divisor * depth_divisor
        new_filters = max(depth_divisor, new_filters)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += depth_divisor
        return int(new_filters)

    def round_repeats(repeats, depth_coefficient):
        '''
        round the number of the scaled depth, which is for depth scaling in effcientnet.
        Parameters
        ----------
        repeats: integer
            Specifies the number of repeats for a block.
        depth_coefficient: double
            Specifies the scale coefficient for a block.
        '''

        return int(math.ceil(depth_coefficient * repeats))

    def _MBConvBlock(inputs, in_channels, out_channels, ksize, stride, expansion, se_ratio, stage_id, block_id,
                     noskip=False, activation_fn='relu'):
        '''
        Inverted Residual Block

        Parameters
        ----------
        inputs: input tensor
            Speecify input tensor for block.
        in_channels: integer
            Specifies the number of input tensor's channel.
        out_channels: integer
            Specifies the number of output tensor's channel
        ksize:
            Specifies the kernel size of the convolution
        stride: integer
            Specifies the stride of the convolution
        expansion: double
            Specifies the expansion factor for the input layer.
        se_ratio: double
            Specifies the ratio to squeeze the input filters for squeeze-and-excitation block.
        stage_id: integer
            Specifies stage id for naming layers
        block_id:
            Specifies block id for naming layers
        noskip: bool
            Specifies whether the skip connection is used. By default, the skip connection is used.
        activation_fn:
            Specifies activation function
        '''

        # mobilenetv2 block is also known as inverted residual block, which consists of three convolutions:
        # the first is 1*1 convolution for expansion
        # the second is depthwise convolution
        # the third is 1*1 convolution without any non-linearity for projection

        x = inputs
        prefix = 'stage_{}_block_{}'.format(stage_id, block_id)
        n_groups = in_channels  # for expansion=1, n_groups might be different from pointwise_filters

        if expansion > 1:
            # For MobileNet V2, expansion>1 when stage>0
            n_groups = int(expansion * in_channels)  ## update n_groups
            x = Conv2d(n_groups, 1, include_bias=False, act='identity',
                       name=prefix + 'expand')(x)
            x = BN(name=prefix + 'expand_BN', act='identity')(x)

        # Depthwise convolution
        x = GroupConv2d(n_groups, n_groups, ksize, stride=stride, act='identity',
                        include_bias=False, name=prefix + 'depthwise')(x)
        x = BN(name=prefix + 'depthwise_BN', act=activation_fn)(x)

        # Squeeze-Excitation
        if 0 < se_ratio <= 1:
            se_input = x  # features to be squeezed
            x = GlobalAveragePooling2D(name=prefix + "global_avg_pool")(x)
            # Squeeze
            channels_se = max(1, int(in_channels * se_ratio))
            x = Conv2d(channels_se, 1, include_bias=True, act=activation_fn, name=prefix + 'squeeze')(x)
            x = Conv2d(n_groups, 1, include_bias=True, act='sigmoid', name=prefix + 'excitation')(x)
            x = Reshape(name=prefix + 'reshape', width=n_groups, height=1, depth=1)(x)
            x = Scale(name=prefix + 'scale')([se_input, x])  # x = out*w

        # Project
        x = Conv2d(out_channels, 1, include_bias=False, act='identity', name=prefix + 'project')(x)
        x = BN(name=prefix + 'project_BN', act='identity')(x)  # identity activation on narrow tensor
        # Prepare output for MBConv block
        if in_channels == out_channels and stride == 1 and (not noskip):
            # dropout can be added.
            return Res(name=prefix + 'add_se_residual')([x, inputs])
        else:
            return x

    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='data')
    # refer to Table 1  "EfficientNet-B0 baseline network" in paper:
    # "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks"
    stage_id = 0
    out_channels = round_filters(32, width_coefficient,
                                 depth_divisor)  # multiply with width multiplier: width_coefficient
    x = Conv2d(out_channels, 3, stride=2, include_bias=False, name='Conv1', act='identity')(inp)
    x = BN(name='bn_Conv1', act=activation_fn)(x)

    # Create stages with MBConv blocks from stage 1
    in_channels = out_channels  # number of input channels for first MBblock
    stage_id +=1
    total_blocks = float(sum(args[2] for args in blocks_args))
    for expansion, out_channels, num_blocks, ksize, stride, se_ratio in blocks_args:
        out_channels = round_filters(out_channels, width_coefficient, depth_divisor)
        num_blocks = round_repeats(num_blocks, depth_coefficient)
        strides = [stride] + [1] * (num_blocks - 1)
        for block_id, stride in enumerate(strides):
            x = _MBConvBlock(x, in_channels, out_channels, ksize, stride, expansion, se_ratio, stage_id, block_id,activation_fn)
            in_channels = out_channels  # out_channel
        stage_id += 1

    last_block_filters = round_filters(1280, width_coefficient, depth_divisor)
    x = Conv2d(last_block_filters, 1, include_bias=False, name='Conv_top', act='identity')(x)
    x = BN(name='Conv_top_bn', act=activation_fn)(x)

    x = GlobalAveragePooling2D(name="Global_avg_pool", dropout=dropout_rate)(x)
    x = OutputLayer(n=n_classes)(x)

    model = Model(conn, inp, x, model_table)
    model.compile()
    return model
Beispiel #6
0
def ShuffleNetV1(conn,
                 model_table='ShuffleNetV1',
                 n_classes=1000,
                 n_channels=3,
                 width=224,
                 height=224,
                 norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225),
                 offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406),
                 random_flip=None,
                 random_crop=None,
                 random_mutation=None,
                 scale_factor=1.0,
                 num_shuffle_units=[3, 7, 3],
                 bottleneck_ratio=0.25,
                 groups=3,
                 block_act='identity'):
    '''
    Generates a deep learning model with the ShuffleNetV1 architecture.
    The implementation is revised based on https://github.com/scheckmedia/keras-shufflenet/blob/master/shufflenet.py

    Parameters
    ----------
    conn : CAS
        Specifies the CAS connection object.
    model_table : string or dict or CAS table, optional
        Specifies the CAS table to store the deep learning model.
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 1000
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 32
    height : int, optional
        Specifies the height of the input layer.
        Default: 32
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
        Default: (255 * 0.229, 255 * 0.224, 255 * 0.225)
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
        Default: (255*0.485, 255*0.456, 255*0.406)
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the input layer.
        Valid Values: 'none', 'random'
    scale_factor : double

    num_shuffle_units: iter-of-int, optional
        number of stages (list length) and the number of shufflenet units in a
        stage beginning with stage 2 because stage 1 is fixed
        e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2
        idx 1 contains 7 + 1 Shufflenet Units for stage 3 and
        idx 2 contains 3 + 1 Shufflenet Units
        Default: [3, 7, 3]
    bottleneck_ratio : double
        bottleneck ratio implies the ratio of bottleneck channels to output channels.
        For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
        the width of the bottleneck feature map.
    groups: int
        Specifies the number of groups per channel
        Default : 3
    block_act : str
        Specifies the activation function after depth-wise convolution and batch normalization layer
        Default : 'identity'

    Returns
    -------
    :class:`Model`

    References
    ----------
    https://arxiv.org/pdf/1707.01083

    '''
    def _block(x, channel_map, bottleneck_ratio, repeat=1, groups=1, stage=1):
        """
        creates a bottleneck block

        Parameters
        ----------
        x:
            Input tensor
        channel_map:
            list containing the number of output channels for a stage
        repeat:
            number of repetitions for a shuffle unit with stride 1
        groups:
            number of groups per channel
        bottleneck_ratio:
            bottleneck ratio implies the ratio of bottleneck channels to output channels.
        stage:
            stage number

        Returns
        -------
        """
        x = _shuffle_unit(x,
                          in_channels=channel_map[stage - 2],
                          out_channels=channel_map[stage - 1],
                          strides=2,
                          groups=groups,
                          bottleneck_ratio=bottleneck_ratio,
                          stage=stage,
                          block=1)

        for i in range(1, repeat + 1):
            x = _shuffle_unit(x,
                              in_channels=channel_map[stage - 1],
                              out_channels=channel_map[stage - 1],
                              strides=1,
                              groups=groups,
                              bottleneck_ratio=bottleneck_ratio,
                              stage=stage,
                              block=(i + 1))

        return x

    def _shuffle_unit(inputs,
                      in_channels,
                      out_channels,
                      groups,
                      bottleneck_ratio,
                      strides=2,
                      stage=1,
                      block=1):
        """
        create a shuffle unit

        Parameters
        ----------
        inputs:
            Input tensor of with `channels_last` data format
        in_channels:
            number of input channels
        out_channels:
            number of output channels
        strides:
            An integer or tuple/list of 2 integers,
        groups:
            number of groups per channel
        bottleneck_ratio: float
            bottleneck ratio implies the ratio of bottleneck channels to output channels.
        stage:
            stage number
        block:
            block number

        """
        prefix = 'stage%d/block%d' % (stage, block)

        # if strides >= 2:
        # out_channels -= in_channels

        # default: 1/4 of the output channel of a ShuffleNet Unit
        bottleneck_channels = int(out_channels * bottleneck_ratio)
        groups = (1 if stage == 2 and block == 1 else groups)

        # x = _group_conv(inputs, in_channels, out_channels = bottleneck_channels,
        #                 groups = (1 if stage == 2 and block == 1 else groups),
        #                 name = '%s/1x1_gconv_1' % prefix)

        x = GroupConv2d(bottleneck_channels,
                        n_groups=(1 if stage == 2 and block == 1 else groups),
                        act='identity',
                        width=1,
                        height=1,
                        stride=1,
                        include_bias=False,
                        name='%s/1x1_gconv_1' % prefix)(inputs)

        x = BN(act='relu', name='%s/bn_gconv_1' % prefix)(x)

        x = ChannelShuffle(n_groups=groups,
                           name='%s/channel_shuffle' % prefix)(x)
        # depthwise convolutioin
        x = GroupConv2d(x.shape[-1],
                        n_groups=x.shape[-1],
                        width=3,
                        height=3,
                        include_bias=False,
                        stride=strides,
                        act='identity',
                        name='%s/1x1_dwconv_1' % prefix)(x)
        x = BN(act=block_act, name='%s/bn_dwconv_1' % prefix)(x)

        out_channels = out_channels if strides == 1 else out_channels - in_channels
        x = GroupConv2d(out_channels,
                        n_groups=groups,
                        width=1,
                        height=1,
                        stride=1,
                        act='identity',
                        include_bias=False,
                        name='%s/1x1_gconv_2' % prefix)(x)

        x = BN(act=block_act, name='%s/bn_gconv_2' % prefix)(x)

        if strides < 2:
            ret = Res(act='relu', name='%s/add' % prefix)([x, inputs])
        else:
            avg = Pooling(width=3,
                          height=3,
                          stride=2,
                          pool='mean',
                          name='%s/avg_pool' % prefix)(inputs)
            ret = Concat(act='relu', name='%s/concat' % prefix)([x, avg])

        return ret

    out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384}
    try:
        import numpy as np
    except:
        raise DLPyError('Please install numpy to use this architecture.')

    exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0,
                    0)
    out_channels_in_stage = 2**exp
    out_channels_in_stage *= out_dim_stage_two[
        groups]  # calculate output channels for each stage
    out_channels_in_stage[0] = 24  # first stage has always 24 output channels
    out_channels_in_stage *= scale_factor
    out_channels_in_stage = out_channels_in_stage.astype(int)

    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='data')

    # create shufflenet architecture
    x = Conv2d(out_channels_in_stage[0],
               3,
               include_bias=False,
               stride=2,
               act="identity",
               name="conv1")(inp)
    x = BN(act='relu', name='bn1')(x)
    x = Pooling(width=3, height=3, stride=2, name="maxpool1")(x)

    # create stages containing shufflenet units beginning at stage 2
    for stage in range(0, len(num_shuffle_units)):
        repeat = num_shuffle_units[stage]
        x = _block(x,
                   out_channels_in_stage,
                   repeat=repeat,
                   bottleneck_ratio=bottleneck_ratio,
                   groups=groups,
                   stage=stage + 2)

    x = GlobalAveragePooling2D(name="Global_avg_pool")(x)
    x = OutputLayer(n=n_classes)(x)

    model = Model(conn, inputs=inp, outputs=x, model_table=model_table)
    model.compile()

    return model
Beispiel #7
0
def Faster_RCNN(conn,
                model_table='Faster_RCNN',
                n_channels=3,
                width=1000,
                height=496,
                scale=1,
                norm_stds=None,
                offsets=(102.9801, 115.9465, 122.7717),
                random_mutation=None,
                n_classes=20,
                anchor_num_to_sample=256,
                anchor_ratio=[0.5, 1, 2],
                anchor_scale=[8, 16, 32],
                base_anchor_size=16,
                coord_type='coco',
                max_label_per_image=200,
                proposed_roi_num_train=2000,
                proposed_roi_num_score=300,
                roi_train_sample_num=128,
                roi_pooling_height=7,
                roi_pooling_width=7,
                nms_iou_threshold=0.3,
                detection_threshold=0.5,
                max_object_num=50,
                number_of_neurons_in_fc=4096,
                backbone='vgg16',
                random_flip=None,
                random_crop=None):
    '''
    Generates a deep learning model with the faster RCNN architecture.

    Parameters
    ----------
    conn : CAS
        Specifies the connection of the CAS connection.
    model_table : string, optional
        Specifies the name of CAS table to store the model.
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 1000
    height : int, optional
        Specifies the height of the input layer.
        Default: 496
    scale : double, optional
        Specifies a scaling factor to be applied to each pixel intensity values.
        Default: 1
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the
        input layer.
        Valid Values: 'none', 'random'
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 20
    anchor_num_to_sample : int, optional
        Specifies the number of anchors to sample for training the region proposal network
        Default: 256
    anchor_ratio : iter-of-float
        Specifies the anchor height and width ratios (h/w) used.
    anchor_scale : iter-of-float
        Specifies the anchor scales used based on base_anchor_size
    base_anchor_size : int, optional
        Specifies the basic anchor size in width and height (in pixels) in the original input image dimension
        Default: 16
    coord_type : int, optional
        Specifies the coordinates format type in the input label and detection result.
        Valid Values: RECT, COCO, YOLO
        Default: COCO
    proposed_roi_num_score: int, optional
        Specifies the number of ROI (Region of Interest) to propose in the scoring phase
        Default: 300
    proposed_roi_num_train: int, optional
        Specifies the number of ROI (Region of Interest) to propose used for RPN training, and also the pool to
        sample from for FastRCNN Training in the training phase
        Default: 2000
    roi_train_sample_num: int, optional
        Specifies the number of ROIs(Regions of Interests) to sample after NMS(Non-maximum Suppression)
        is performed in the training phase.
        Default: 128
    roi_pooling_height : int, optional
        Specifies the output height of the region pooling layer.
        Default: 7
    roi_pooling_width : int, optional
        Specifies the output width of the region pooling layer.
        Default: 7
    max_label_per_image : int, optional
        Specifies the maximum number of labels per image in the training.
        Default: 200
    nms_iou_threshold: float, optional
        Specifies the IOU threshold of maximum suppression in object detection
        Default: 0.3
    detection_threshold : float, optional
        Specifies the threshold for object detection.
        Default: 0.5
    max_object_num: int, optional
        Specifies the maximum number of object to detect
        Default: 50
    number_of_neurons_in_fc: int, or list of int, optional
        Specifies the number of neurons in the last two fully connected layers. If one int is set, then
        both of the layers will have the same values. If a list is set, then the layers get different
        number of neurons.
        Default: 4096
    backbone: string, optional
        Specifies the architecture to be used as the feature extractor.
        Valid values: vgg16
        Default: vgg16, resnet50, resnet18, resnet34, mobilenetv1, mobilenetv2
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'

    Returns
    -------
    :class:`Sequential`

    References
    ----------
    https://arxiv.org/abs/1506.01497

    '''
    # calculate number of anchors that equal to product of length of anchor_ratio and length of anchor_scale
    num_anchors = len(anchor_ratio) * len(anchor_scale)
    parameters = locals()
    # get parameters of input, rpn, fast_rcnn layer
    input_parameters = get_layer_options(input_layer_options, parameters)
    rpn_parameters = get_layer_options(rpn_layer_options, parameters)
    fast_rcnn_parameters = get_layer_options(fast_rcnn_options, parameters)
    inp = Input(**input_parameters, name='data')

    if backbone.lower() == 'vgg16':
        # backbone is VGG16 model
        conv1_1 = Conv2d(n_filters=64,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv1_1')(inp)
        conv1_2 = Conv2d(n_filters=64,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv1_2')(conv1_1)
        pool1 = Pooling(width=2, height=2, stride=2, pool='max',
                        name='pool1')(conv1_2)

        conv2_1 = Conv2d(n_filters=128,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv2_1')(pool1)
        conv2_2 = Conv2d(n_filters=128,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv2_2')(conv2_1)
        pool2 = Pooling(width=2, height=2, stride=2, pool='max')(conv2_2)

        conv3_1 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_1')(pool2)
        conv3_2 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_2')(conv3_1)
        conv3_3 = Conv2d(n_filters=256,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv3_3')(conv3_2)
        pool3 = Pooling(width=2, height=2, stride=2, pool='max')(conv3_3)

        conv4_1 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_1')(pool3)
        conv4_2 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_2')(conv4_1)
        conv4_3 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv4_3')(conv4_2)
        pool4 = Pooling(width=2, height=2, stride=2, pool='max')(conv4_3)

        conv5_1 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv5_1')(pool4)
        conv5_2 = Conv2d(n_filters=512,
                         width=3,
                         height=3,
                         stride=1,
                         name='conv5_2')(conv5_1)
        # feature of Conv5_3 is used to generate region proposals
        last_layer_in_backbone = Conv2d(n_filters=512,
                                        width=3,
                                        height=3,
                                        stride=1,
                                        name='conv5_3')(conv5_2)
        # two convolutions build on top of conv5_3 and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)

        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)

        # given ROIs, crop on conv5_3 and resize the feature to the same size
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone.shape[0] / width,
            name='roi_pooling')([last_layer_in_backbone, rp1])

    elif backbone.lower() == 'resnet50':

        from .resnet import ResNet50_SAS
        backbone = ResNet50_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'resnet34':
        from .resnet import ResNet34_SAS
        backbone = ResNet34_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'resnet18':
        from .resnet import ResNet18_SAS
        backbone = ResNet18_SAS(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'mobilenetv1':
        from .mobilenet import MobileNetV1
        backbone = MobileNetV1(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])

    elif backbone.lower() == 'mobilenetv2':
        from .mobilenet import MobileNetV2
        backbone = MobileNetV2(conn, width=width, height=height)
        backbone.layers[-2].src_layers
        backbone_with_last = backbone.to_functional_model(
            stop_layers=backbone.layers[-2])
        last_layer_in_backbone = backbone_with_last(inp)
        # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors
        rpn_conv = Conv2d(width=3, n_filters=512,
                          name='rpn_conv_3x3')(last_layer_in_backbone)
        rpn_score = Conv2d(act='identity',
                           width=1,
                           n_filters=((1 + 1 + 4) * num_anchors),
                           name='rpn_score')(rpn_conv)
        # propose anchors, NMS, select anchors to train RPN, produce ROIs
        rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score)
        roipool1 = ROIPooling(
            output_height=roi_pooling_height,
            output_width=roi_pooling_width,
            spatial_scale=last_layer_in_backbone[0].shape.output_size[0] /
            height,
            name='roi_pooling')([last_layer_in_backbone[0], rp1])
    else:
        raise DLPyError('We are not supporting this backbone yet.')

    # fully connect layer to extract the feature of ROIs
    if number_of_neurons_in_fc is None:
        fc6 = Dense(n=4096, act='relu', name='fc6')(roipool1)
        fc7 = Dense(n=4096, act='relu', name='fc7')(fc6)
    else:
        if isinstance(number_of_neurons_in_fc, list):
            if len(number_of_neurons_in_fc) > 1:
                fc6 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc6')(roipool1)
                fc7 = Dense(n=number_of_neurons_in_fc[1],
                            act='relu',
                            name='fc7')(fc6)
            else:
                fc6 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc6')(roipool1)
                fc7 = Dense(n=number_of_neurons_in_fc[0],
                            act='relu',
                            name='fc7')(fc6)
        else:
            fc6 = Dense(n=number_of_neurons_in_fc, act='relu',
                        name='fc6')(roipool1)
            fc7 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc7')(fc6)
    # classification tensor
    cls1 = Dense(n=n_classes + 1, act='identity', name='cls_score')(fc7)
    # regression tensor(second stage bounding box regression)
    reg1 = Dense(n=(n_classes + 1) * 4, act='identity', name='bbox_pred')(fc7)
    # task layer receive cls1, reg1 and rp1(ground truth). Train the second stage.
    fr1 = FastRCNN(**fast_rcnn_parameters,
                   class_number=n_classes,
                   name='fastrcnn')([cls1, reg1, rp1])
    faster_rcnn = Model(conn, inp, fr1, model_table=model_table)
    faster_rcnn.compile()
    return faster_rcnn
Beispiel #8
0
def UNet(conn,
         model_table='UNet',
         n_classes=2,
         n_channels=1,
         width=256,
         height=256,
         scale=1.0 / 255,
         norm_stds=None,
         offsets=None,
         random_mutation=None,
         init=None,
         bn_after_convolutions=False,
         random_flip=None,
         random_crop=None):
    '''
    Generates a deep learning model with the U-Net architecture.

    Parameters
    ----------
    conn : CAS
        Specifies the connection of the CAS connection.
    model_table : string, optional
        Specifies the name of CAS table to store the model.
    n_classes : int, optional
        Specifies the number of classes. If None is assigned, the model will
        automatically detect the number of classes based on the training set.
        Default: 2
    n_channels : int, optional
        Specifies the number of the channels (i.e., depth) of the input layer.
        Default: 3
    width : int, optional
        Specifies the width of the input layer.
        Default: 256
    height : int, optional
        Specifies the height of the input layer.
        Default: 256
    scale : double, optional
        Specifies a scaling factor to be applied to each pixel intensity values.
        Default: 1.0/255
    norm_stds : double or iter-of-doubles, optional
        Specifies a standard deviation for each channel in the input data.
        The final input data is normalized with specified means and standard deviations.
    offsets : double or iter-of-doubles, optional
        Specifies an offset for each channel in the input data. The final input
        data is set after applying scaling and subtracting the specified offsets.
    random_mutation : string, optional
        Specifies how to apply data augmentations/mutations to the data in the
        input layer.
        Valid Values: 'none', 'random'
    init : str
        Specifies the initialization scheme for convolution layers.
        Valid Values: XAVIER, UNIFORM, NORMAL, CAUCHY, XAVIER1, XAVIER2, MSRA, MSRA1, MSRA2
        Default: None
    bn_after_convolutions : Boolean
        If set to True, a batch normalization layer is added after each convolution layer.
    random_flip : string, optional
        Specifies how to flip the data in the input layer when image data is
        used. Approximately half of the input data is subject to flipping.
        Valid Values: 'h', 'hv', 'v', 'none'
    random_crop : string, optional
        Specifies how to crop the data in the input layer when image data is
        used. Images are cropped to the values that are specified in the width
        and height parameters. Only the images with one or both dimensions
        that are larger than those sizes are cropped.
        Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop'

    Returns
    -------
    :class:`Sequential`

    References
    ----------
    https://arxiv.org/pdf/1505.04597

    '''
    parameters = locals()
    input_parameters = get_layer_options(input_layer_options, parameters)
    inp = Input(**input_parameters, name='data')
    act_conv = 'relu'
    bias_conv = True
    if bn_after_convolutions:
        act_conv = 'identity'
        bias_conv = False
    # The model follows UNet paper architecture. The network down-samples by performing max pooling with stride=2
    conv1 = Conv2d(64, 3, act=act_conv, init=init, include_bias=bias_conv)(inp)
    conv1 = BN(act='relu')(conv1) if bn_after_convolutions else conv1
    conv1 = Conv2d(64, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv1)
    conv1 = BN(act='relu')(conv1) if bn_after_convolutions else conv1
    pool1 = Pooling(2)(conv1)

    conv2 = Conv2d(128, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(pool1)
    conv2 = BN(act='relu')(conv2) if bn_after_convolutions else conv2
    conv2 = Conv2d(128, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv2)
    conv2 = BN(act='relu')(conv2) if bn_after_convolutions else conv2
    pool2 = Pooling(2)(conv2)

    conv3 = Conv2d(256, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(pool2)
    conv3 = BN(act='relu')(conv3) if bn_after_convolutions else conv3
    conv3 = Conv2d(256, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv3)
    conv3 = BN(act='relu')(conv3) if bn_after_convolutions else conv3
    pool3 = Pooling(2)(conv3)

    conv4 = Conv2d(512, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(pool3)
    conv4 = BN(act='relu')(conv4) if bn_after_convolutions else conv4
    conv4 = Conv2d(512, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv4)
    conv4 = BN(act='relu')(conv4) if bn_after_convolutions else conv4
    pool4 = Pooling(2)(conv4)

    conv5 = Conv2d(1024, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(pool4)
    conv5 = BN(act='relu')(conv5) if bn_after_convolutions else conv5
    conv5 = Conv2d(1024, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv5)
    conv5 = BN(act='relu')(conv5) if bn_after_convolutions else conv5
    # the minimum is 1/2^4 of the original image size
    # Our implementation applies Transpose convolution to upsample feature maps.
    tconv6 = Conv2DTranspose(512,
                             3,
                             stride=2,
                             act='relu',
                             padding=1,
                             output_size=conv4.shape,
                             init=init)(conv5)  # 64
    # concatenation layers to combine encoder and decoder features
    merge6 = Concat()([conv4, tconv6])
    conv6 = Conv2d(512, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(merge6)
    conv6 = BN(act='relu')(conv6) if bn_after_convolutions else conv6
    conv6 = Conv2d(512, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv6)
    conv6 = BN(act='relu')(conv6) if bn_after_convolutions else conv6

    tconv7 = Conv2DTranspose(256,
                             3,
                             stride=2,
                             act='relu',
                             padding=1,
                             output_size=conv3.shape,
                             init=init)(conv6)  # 128
    merge7 = Concat()([conv3, tconv7])
    conv7 = Conv2d(256, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(merge7)
    conv7 = BN(act='relu')(conv7) if bn_after_convolutions else conv7
    conv7 = Conv2d(256, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv7)
    conv7 = BN(act='relu')(conv7) if bn_after_convolutions else conv7

    tconv8 = Conv2DTranspose(128,
                             stride=2,
                             act='relu',
                             padding=1,
                             output_size=conv2.shape,
                             init=init)(conv7)  # 256
    merge8 = Concat()([conv2, tconv8])
    conv8 = Conv2d(128, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(merge8)
    conv8 = BN(act='relu')(conv8) if bn_after_convolutions else conv8
    conv8 = Conv2d(128, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv8)
    conv8 = BN(act='relu')(conv8) if bn_after_convolutions else conv8

    tconv9 = Conv2DTranspose(64,
                             stride=2,
                             act='relu',
                             padding=1,
                             output_size=conv1.shape,
                             init=init)(conv8)  # 512
    merge9 = Concat()([conv1, tconv9])
    conv9 = Conv2d(64, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(merge9)
    conv9 = BN(act='relu')(conv9) if bn_after_convolutions else conv9
    conv9 = Conv2d(64, 3, act=act_conv, init=init,
                   include_bias=bias_conv)(conv9)
    conv9 = BN(act='relu')(conv9) if bn_after_convolutions else conv9

    conv9 = Conv2d(n_classes, 3, act='relu', init=init)(conv9)

    seg1 = Segmentation(name='Segmentation_1')(conv9)
    model = Model(conn, inputs=inp, outputs=seg1, model_table=model_table)
    model.compile()
    return model
    def build_embedding_model(cls,
                              branch,
                              model_table=None,
                              embedding_model_type='Siamese',
                              embedding_layer=None,
                              margin=None):
        '''

        Build an embedding model based on a given model branch and model type

        Parameters
        ----------
        branch : Model
            Specifies the base model that is used as branches for embedding model.
        model_table : string or dict or CAS table, optional
            Specifies the CAS table to store the deep learning model.
            Default: None
        embedding_model_type : string, optional
            Specifies the embedding model type that the created table will be applied for training.
            Valid values: Siamese, Triplet, and Quartet.
            Default: Siamese
        embedding_layer: Layer, optional
            Specifies a dense layer as the embedding layer. For instance, Dense(n=10, act='identity') defines
            the embedding dimension is 10. When it is not given, the last layer (except the task layers)
            in the branch model will be used as the embedding layer.
        margin: double, optional
            Specifies the margin value used by the embedding model. When it is not given, for Siamese, margin is 2.0.
            Otherwise, margin is 0.0.

        Returns
        -------
        :class:`Model`

        '''

        # check the branch type
        if not isinstance(branch, Model):
            raise DLPyError('The branch option must contain a valid model')

        # the branch must be built using functional APIs
        # only functional model has the attr output_layers
        if not hasattr(branch, 'output_layers'):
            print("NOTE: Convert the branch model into a functional model.")
            branch_tensor = branch.to_functional_model()
        else:
            branch_tensor = deepcopy(branch)

        # always reset this local tensor to 0
        branch_tensor.number_of_instances = 0

        # the branch cannot contain other task layers
        if len(branch_tensor.output_layers) != 1:
            raise DLPyError(
                'The branch model cannot contain more than one output layer')
        elif branch_tensor.output_layers[0].type == OutputLayer.type or \
                branch_tensor.output_layers[0].type == Keypoints.type:
            print("NOTE: Remove the task layers from the model.")
            branch_tensor.layers.remove(branch_tensor.output_layers[0])
            branch_tensor.output_layers[0] = branch_tensor.layers[-1]
        elif branch_tensor.output_layers[0].can_be_last_layer:
            raise DLPyError(
                'The branch model cannot contain task layer except output or keypoints layer.'
            )

        # check embedding_model_type
        if embedding_model_type.lower() not in [
                'siamese', 'triplet', 'quartet'
        ]:
            raise DLPyError('Only Siamese, Triplet, and Quartet are valid.')

        if embedding_model_type.lower() == 'siamese':
            if margin is None:
                margin = 2.0
            cls.number_of_branches = 2
        elif embedding_model_type.lower() == 'triplet':
            if margin is None:
                margin = 0.0
            cls.number_of_branches = 3
        elif embedding_model_type.lower() == 'quartet':
            if margin is None:
                margin = 0.0
            cls.number_of_branches = 4

        cls.embedding_model_type = embedding_model_type

        # build the branches
        input_layers = []
        branch_layers = []
        for i_branch in range(cls.number_of_branches):
            temp_input_layer = Input(**branch_tensor.layers[0].config,
                                     name=cls.input_layer_name_prefix +
                                     str(i_branch))
            temp_branch = branch_tensor(
                temp_input_layer)  # return a list of tensors
            if embedding_layer:
                temp_embed_layer = deepcopy(embedding_layer)
                temp_embed_layer.name = cls.embedding_layer_name_prefix + str(
                    i_branch)
                temp_branch = temp_embed_layer(temp_branch)
                # change tensor to a list
                temp_branch = [temp_branch]
            else:
                # change the last layer name to the embedding layer name
                temp_branch[
                    -1]._op.name = cls.embedding_layer_name_prefix + str(
                        i_branch)

            if i_branch == 0:
                cls.branch_input_tensor = temp_input_layer
                if len(temp_branch) == 1:
                    cls.branch_output_tensor = temp_branch[0]
                else:
                    cls.branch_output_tensor = temp_branch

            # append these layers to the current branch
            input_layers.append(temp_input_layer)
            branch_layers = branch_layers + temp_branch

        # add the embedding loss layer
        loss_layer = EmbeddingLoss(
            margin=margin, name=cls.embedding_loss_layer_name)(branch_layers)

        # create the model DAG using all the above model information
        model = EmbeddingModel(branch.conn,
                               model_table=model_table,
                               inputs=input_layers,
                               outputs=loss_layer)

        # sharing weights
        # get all layer names from one branch
        num_l = int((len(model.layers) - 1) / cls.number_of_branches)
        br1_name = [i.name for i in model.layers[:num_l - 1]]

        # build the list that contain the shared layers
        share_list = []
        n_id = 0
        n_to = n_id + cls.number_of_branches
        for l in br1_name[1:]:
            share_list.append(
                {l: [l + '_' + str(i + 1) for i in range(n_id + 1, n_to)]})

        # add embedding layers
        share_list.append({
            cls.embedding_layer_name_prefix + str(0): [
                cls.embedding_layer_name_prefix + str(i)
                for i in range(1, cls.number_of_branches)
            ]
        })

        model.share_weights(share_list)

        model.compile()

        # generate data_specs
        if embedding_model_type.lower() == 'siamese':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='numnom',
                         layer=cls.embedding_loss_layer_name,
                         data=['_dissimilar_'])
            ]
        elif embedding_model_type.lower() == 'triplet':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '2',
                         data=['_image_2'])
            ]

        elif embedding_model_type.lower() == 'quartet':
            cls.data_specs = [
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '0',
                         data=['_image_']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '1',
                         data=['_image_1']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '2',
                         data=['_image_2']),
                DataSpec(type_='image',
                         layer=cls.input_layer_name_prefix + '3',
                         data=['_image_3'])
            ]

        return model