def test_build_gan_model(self): if self.server_dir is None: unittest.TestCase.skipTest(self, "DLPY_DATA_DIR_SERVER is not set in the environment variables") # test default resnet18_model = ResNet18_Caffe(self.s, width=224, height=224, random_flip='HV', random_mutation='random' ) branch = resnet18_model.to_functional_model(stop_layers=resnet18_model.layers[-1]) # raise error self.assertRaises(DLPyError, lambda: GANModel(branch, branch)) # change the output size for generator inp = Input(**branch.layers[0].config) generator = Conv2D(width=1, height=1, n_filters=224 * 224 * 3)(branch(inp)) output = OutputLayer(n=1)(generator) generator = Model(self.s, inp, output) gan_model = GANModel(generator, branch) res = gan_model.models['generator'].print_summary() print(res) res = gan_model.models['discriminator'].print_summary() print(res)
def MobileNetV2(conn, model_table='MobileNetV2', n_classes=1000, n_channels=3, width=224, height=224, norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225), offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406), random_flip=None, random_crop=None, random_mutation=None, alpha=1): ''' Generates a deep learning model with the MobileNetV2 architecture. The implementation is revised based on https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet_v2.py Parameters ---------- conn : CAS Specifies the CAS connection object. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 1000 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 224 height : int, optional Specifies the height of the input layer. Default: 224 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. Default: (255 * 0.229, 255 * 0.224, 255 * 0.225) offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. Default: (255*0.485, 255*0.456, 255*0.406) random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' alpha : int, optional Specifies the width multiplier in the MobileNet paper Default: 1 alpha : int, optional Returns ------- :class:`Model` References ---------- https://arxiv.org/abs/1801.04381 ''' def _make_divisible(v, divisor, min_value=None): # make number of channel divisible if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v def _inverted_res_block(inputs, in_channels, expansion, stride, alpha, filters, block_id): """ Inverted Residual Block Parameters ---------- inputs: Input tensor in_channels: Specifies the number of input tensor's channel expansion: expansion factor always applied to the input size. stride: the strides of the convolution alpha: width multiplier. filters: the dimensionality of the output space. block_id: block id used for naming layers """ pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'block_{}_'.format(block_id) n_groups = in_channels if block_id: # Expand n_groups = expansion * in_channels x = Conv2d(expansion * in_channels, 1, include_bias=False, act='identity', name=prefix + 'expand')(x) x = BN(name=prefix + 'expand_BN', act='identity')(x) else: prefix = 'expanded_conv_' # Depthwise x = GroupConv2d(n_groups, n_groups, 3, stride=stride, act='identity', include_bias=False, name=prefix + 'depthwise')(x) x = BN(name=prefix + 'depthwise_BN', act='relu')(x) # Project x = Conv2d(pointwise_filters, 1, include_bias=False, act='identity', name=prefix + 'project')(x) x = BN(name=prefix + 'project_BN', act='identity')(x) # identity activation on narrow tensor if in_channels == pointwise_filters and stride == 1: return Res(name=prefix + 'add')([inputs, x]), pointwise_filters return x, pointwise_filters parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='data') # compared with mobilenetv1, v2 introduces inverted residual structure. # and Non-linearities in narrow layers are removed. # inverted residual block does three convolutins: first is 1*1 convolution, second is depthwise convolution, # third is 1*1 convolution but without any non-linearity first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2d(first_block_filters, 3, stride=2, include_bias=False, name='Conv1', act='identity')(inp) x = BN(name='bn_Conv1', act='relu')(x) x, n_channels = _inverted_res_block(x, first_block_filters, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x, n_channels = _inverted_res_block(x, n_channels, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x, n_channels = _inverted_res_block(x, n_channels, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x, n_channels = _inverted_res_block(x, n_channels, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x, n_channels = _inverted_res_block(x, n_channels, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x, n_channels = _inverted_res_block(x, n_channels, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x, n_channels = _inverted_res_block(x, n_channels, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x, n_channels = _inverted_res_block(x, n_channels, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x, n_channels = _inverted_res_block(x, n_channels, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x, n_channels = _inverted_res_block(x, n_channels, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x, n_channels = _inverted_res_block(x, n_channels, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x, n_channels = _inverted_res_block(x, n_channels, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x, n_channels = _inverted_res_block(x, n_channels, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x, n_channels = _inverted_res_block(x, n_channels, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x, n_channels = _inverted_res_block(x, n_channels, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x, n_channels = _inverted_res_block(x, n_channels, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x, n_channels = _inverted_res_block(x, n_channels, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = Conv2d(last_block_filters, 1, include_bias=False, name='Conv_1', act='identity')(x) x = BN(name='Conv_1_bn', act='relu')(x) x = GlobalAveragePooling2D(name="Global_avg_pool")(x) x = OutputLayer(n=n_classes)(x) model = Model(conn, inp, x, model_table) model.compile() return model
def MobileNetV1(conn, model_table='MobileNetV1', n_classes=1000, n_channels=3, width=224, height=224, random_flip=None, random_crop=None, random_mutation=None, norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225), offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406), alpha=1, depth_multiplier=1): ''' Generates a deep learning model with the MobileNetV1 architecture. The implementation is revised based on https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py Parameters ---------- conn : CAS Specifies the CAS connection object. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 1000 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 32 height : int, optional Specifies the height of the input layer. Default: 32 random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. Default: (255*0.229, 255*0.224, 255*0.225) offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. Default: (255*0.485, 255*0.456, 255*0.406) alpha : int, optional Specifies the width multiplier in the MobileNet paper Default: 1 depth_multiplier : int, optional Specifies the number of depthwise convolution output channels for each input channel. Default: 1 Returns ------- :class:`Model` References ---------- https://arxiv.org/pdf/1605.07146.pdf ''' def _conv_block(inputs, filters, alpha, kernel=3, stride=1): """ Adds an initial convolution layer (with batch normalization inputs: Input tensor filters: the dimensionality of the output space alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: specifying the width and height of the 2D convolution window. strides: the strides of the convolution """ filters = int(filters * alpha) x = Conv2d(filters, kernel, act='identity', include_bias=False, stride=stride, name='conv1')(inputs) x = BN(name='conv1_bn', act='relu')(x) return x, filters def _depthwise_conv_block(inputs, n_groups, pointwise_conv_filters, alpha, depth_multiplier=1, stride=1, block_id=1): """Adds a depthwise convolution block. inputs: Input tensor n_groups : int number of groups pointwise_conv_filters: the dimensionality of the output space alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution block_id: Integer, a unique identification designating the block number. """ pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = GroupConv2d(n_groups * depth_multiplier, n_groups, 3, stride=stride, act='identity', include_bias=False, name='conv_dw_%d' % block_id)(inputs) x = BN(name='conv_dw_%d_bn' % block_id, act='relu')(x) x = Conv2d(pointwise_conv_filters, 1, act='identity', include_bias=False, stride=1, name='conv_pw_%d' % block_id)(x) x = BN(name='conv_pw_%d_bn' % block_id, act='relu')(x) return x, pointwise_conv_filters parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='data') # the model down-sampled for 5 times by performing stride=2 convolution on # conv_dw_1, conv_dw_2, conv_dw_4, conv_dw_6, conv_dw_12 # for each block, we use depthwise convolution with kernel=3 and point-wise convolution to save computation x, depth = _conv_block(inp, 32, alpha, stride=2) x, depth = _depthwise_conv_block(x, depth, 64, alpha, depth_multiplier, block_id=1) x, depth = _depthwise_conv_block(x, depth, 128, alpha, depth_multiplier, stride=2, block_id=2) x, depth = _depthwise_conv_block(x, depth, 128, alpha, depth_multiplier, block_id=3) x, depth = _depthwise_conv_block(x, depth, 256, alpha, depth_multiplier, stride=2, block_id=4) x, depth = _depthwise_conv_block(x, depth, 256, alpha, depth_multiplier, block_id=5) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, stride=2, block_id=6) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, block_id=7) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, block_id=8) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, block_id=9) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, block_id=10) x, depth = _depthwise_conv_block(x, depth, 512, alpha, depth_multiplier, block_id=11) x, depth = _depthwise_conv_block(x, depth, 1024, alpha, depth_multiplier, stride=2, block_id=12) x, depth = _depthwise_conv_block(x, depth, 1024, alpha, depth_multiplier, block_id=13) x = GlobalAveragePooling2D(name="Global_avg_pool")(x) x = OutputLayer(n=n_classes)(x) model = Model(conn, inp, x, model_table) model.compile() return model
def ENet(conn, model_table='ENet', n_classes=2, n_channels=3, width=512, height=512, scale=1.0 / 255, norm_stds=None, offsets=None, random_mutation=None, init=None, random_flip=None, random_crop=None, output_image_type=None, output_image_prob=False): ''' Generates a deep learning model with the E-Net architecture. Parameters ---------- conn : CAS Specifies the connection of the CAS connection. model_table : string, optional Specifies the name of CAS table to store the model. Default: ENet n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 2 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 512 height : int, optional Specifies the height of the input layer. Default: 512 scale : double, optional Specifies a scaling factor to be applied to each pixel intensity values. Default: 1.0/255 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' init : str Specifies the initialization scheme for convolution layers. Valid Values: XAVIER, UNIFORM, NORMAL, CAUCHY, XAVIER1, XAVIER2, MSRA, MSRA1, MSRA2 Default: None random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' output_image_type: string, optional Specifies the output image type of this layer. possible values: [ WIDE, PNG, BASE64 ] default: WIDE output_image_prob: bool, options Does not include probabilities if doing classification (default). Returns ------- :class:`Sequential` References ---------- https://arxiv.org/abs/1606.02147 ''' parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='InputLayer_1') # initial x = initial_block(inp) # stage one x = downsampling_bottleneck(x, 16, 64) for i in range(4): x = regular_bottleneck(x, 64, 64) # stage two x = downsampling_bottleneck(x, 64, 128) for i in range(2): x = regular_bottleneck(x, 128, 128) x = regular_bottleneck(x, 128, 128) # stage three for i in range(2): x = regular_bottleneck(x, 128, 128) x = regular_bottleneck(x, 128, 128) # stage four x = upsampling_bottleneck(x, 128, 64) for i in range(2): x = regular_bottleneck(x, 64, 64) # stage five x = upsampling_bottleneck(x, 64, 16) x = regular_bottleneck(x, 16, 16) x = upsampling_bottleneck(x, 16, 16) conv = Conv2d(n_classes, 3, act='relu')(x) seg = Segmentation(name='Segmentation_1', output_image_type=output_image_type, output_image_prob=output_image_prob)(conv) model = Model(conn, inputs=inp, outputs=seg) model.compile() return model
def EfficientNet(conn, model_table='EfficientNet', n_classes=100, n_channels=3, width=224, height=224, width_coefficient=1, depth_coefficient=1, dropout_rate=0.2, drop_connect_rate=0, depth_divisor=8, activation_fn='relu', blocks_args=_MBConv_BLOCKS_ARGS, offsets=(255*0.406, 255*0.456, 255*0.485), norm_stds=(255*0.225, 255*0.224, 255*0.229), random_flip=None, random_crop=None, random_mutation=None): ''' Generates a deep learning model with the EfficientNet architecture. The implementation is revised based on https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py Parameters ---------- conn : CAS Specifies the CAS connection object. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 1000 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 224 height : int, optional Specifies the height of the input layer. Default: 224 width_coefficient: double, optional Specifies the scale coefficient for network width. Default: 1.0 depth_coefficient: double, optional Specifies the scale coefficient for network depth. Default: 1.0 dropout_rate: double, optional Specifies the dropout rate before final classifier layer. Default: 0.2 drop_connect_rate: double, optional Specifies the dropout rate at skip connections. Default: 0.0 depth_divisor: integer, optional Specifies the unit of network width. Default: 8 activation_fn: string, optional Specifies the activation function blocks_args: list of dicts Specifies parameters to construct blocks for the efficientnet model. offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. Default: (255*0.406, 255*0.456, 255*0.485) norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. Default: (255*0.225, 255*0.224, 255*0.229) random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' Returns ------- :class:`Model` References ---------- https://arxiv.org/pdf/1905.11946.pdf ''' def round_filters(filters, width_coefficient, depth_divisor): ''' round the number of the scaled width, which is for width scaling in efficientnet. Parameters ---------- filters: integer Specifies the number of filters. width_coefficient: double Specifies the scale coefficient for network width. depth_divisor: integer Specifies the unit of network width. ''' filters *= width_coefficient new_filters = int(filters + depth_divisor / 2) // depth_divisor * depth_divisor new_filters = max(depth_divisor, new_filters) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += depth_divisor return int(new_filters) def round_repeats(repeats, depth_coefficient): ''' round the number of the scaled depth, which is for depth scaling in effcientnet. Parameters ---------- repeats: integer Specifies the number of repeats for a block. depth_coefficient: double Specifies the scale coefficient for a block. ''' return int(math.ceil(depth_coefficient * repeats)) def _MBConvBlock(inputs, in_channels, out_channels, ksize, stride, expansion, se_ratio, stage_id, block_id, noskip=False, activation_fn='relu'): ''' Inverted Residual Block Parameters ---------- inputs: input tensor Speecify input tensor for block. in_channels: integer Specifies the number of input tensor's channel. out_channels: integer Specifies the number of output tensor's channel ksize: Specifies the kernel size of the convolution stride: integer Specifies the stride of the convolution expansion: double Specifies the expansion factor for the input layer. se_ratio: double Specifies the ratio to squeeze the input filters for squeeze-and-excitation block. stage_id: integer Specifies stage id for naming layers block_id: Specifies block id for naming layers noskip: bool Specifies whether the skip connection is used. By default, the skip connection is used. activation_fn: Specifies activation function ''' # mobilenetv2 block is also known as inverted residual block, which consists of three convolutions: # the first is 1*1 convolution for expansion # the second is depthwise convolution # the third is 1*1 convolution without any non-linearity for projection x = inputs prefix = 'stage_{}_block_{}'.format(stage_id, block_id) n_groups = in_channels # for expansion=1, n_groups might be different from pointwise_filters if expansion > 1: # For MobileNet V2, expansion>1 when stage>0 n_groups = int(expansion * in_channels) ## update n_groups x = Conv2d(n_groups, 1, include_bias=False, act='identity', name=prefix + 'expand')(x) x = BN(name=prefix + 'expand_BN', act='identity')(x) # Depthwise convolution x = GroupConv2d(n_groups, n_groups, ksize, stride=stride, act='identity', include_bias=False, name=prefix + 'depthwise')(x) x = BN(name=prefix + 'depthwise_BN', act=activation_fn)(x) # Squeeze-Excitation if 0 < se_ratio <= 1: se_input = x # features to be squeezed x = GlobalAveragePooling2D(name=prefix + "global_avg_pool")(x) # Squeeze channels_se = max(1, int(in_channels * se_ratio)) x = Conv2d(channels_se, 1, include_bias=True, act=activation_fn, name=prefix + 'squeeze')(x) x = Conv2d(n_groups, 1, include_bias=True, act='sigmoid', name=prefix + 'excitation')(x) x = Reshape(name=prefix + 'reshape', width=n_groups, height=1, depth=1)(x) x = Scale(name=prefix + 'scale')([se_input, x]) # x = out*w # Project x = Conv2d(out_channels, 1, include_bias=False, act='identity', name=prefix + 'project')(x) x = BN(name=prefix + 'project_BN', act='identity')(x) # identity activation on narrow tensor # Prepare output for MBConv block if in_channels == out_channels and stride == 1 and (not noskip): # dropout can be added. return Res(name=prefix + 'add_se_residual')([x, inputs]) else: return x parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='data') # refer to Table 1 "EfficientNet-B0 baseline network" in paper: # "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" stage_id = 0 out_channels = round_filters(32, width_coefficient, depth_divisor) # multiply with width multiplier: width_coefficient x = Conv2d(out_channels, 3, stride=2, include_bias=False, name='Conv1', act='identity')(inp) x = BN(name='bn_Conv1', act=activation_fn)(x) # Create stages with MBConv blocks from stage 1 in_channels = out_channels # number of input channels for first MBblock stage_id +=1 total_blocks = float(sum(args[2] for args in blocks_args)) for expansion, out_channels, num_blocks, ksize, stride, se_ratio in blocks_args: out_channels = round_filters(out_channels, width_coefficient, depth_divisor) num_blocks = round_repeats(num_blocks, depth_coefficient) strides = [stride] + [1] * (num_blocks - 1) for block_id, stride in enumerate(strides): x = _MBConvBlock(x, in_channels, out_channels, ksize, stride, expansion, se_ratio, stage_id, block_id,activation_fn) in_channels = out_channels # out_channel stage_id += 1 last_block_filters = round_filters(1280, width_coefficient, depth_divisor) x = Conv2d(last_block_filters, 1, include_bias=False, name='Conv_top', act='identity')(x) x = BN(name='Conv_top_bn', act=activation_fn)(x) x = GlobalAveragePooling2D(name="Global_avg_pool", dropout=dropout_rate)(x) x = OutputLayer(n=n_classes)(x) model = Model(conn, inp, x, model_table) model.compile() return model
def ShuffleNetV1(conn, model_table='ShuffleNetV1', n_classes=1000, n_channels=3, width=224, height=224, norm_stds=(255 * 0.229, 255 * 0.224, 255 * 0.225), offsets=(255 * 0.485, 255 * 0.456, 255 * 0.406), random_flip=None, random_crop=None, random_mutation=None, scale_factor=1.0, num_shuffle_units=[3, 7, 3], bottleneck_ratio=0.25, groups=3, block_act='identity'): ''' Generates a deep learning model with the ShuffleNetV1 architecture. The implementation is revised based on https://github.com/scheckmedia/keras-shufflenet/blob/master/shufflenet.py Parameters ---------- conn : CAS Specifies the CAS connection object. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 1000 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 32 height : int, optional Specifies the height of the input layer. Default: 32 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. Default: (255 * 0.229, 255 * 0.224, 255 * 0.225) offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. Default: (255*0.485, 255*0.456, 255*0.406) random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' scale_factor : double num_shuffle_units: iter-of-int, optional number of stages (list length) and the number of shufflenet units in a stage beginning with stage 2 because stage 1 is fixed e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2 idx 1 contains 7 + 1 Shufflenet Units for stage 3 and idx 2 contains 3 + 1 Shufflenet Units Default: [3, 7, 3] bottleneck_ratio : double bottleneck ratio implies the ratio of bottleneck channels to output channels. For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times the width of the bottleneck feature map. groups: int Specifies the number of groups per channel Default : 3 block_act : str Specifies the activation function after depth-wise convolution and batch normalization layer Default : 'identity' Returns ------- :class:`Model` References ---------- https://arxiv.org/pdf/1707.01083 ''' def _block(x, channel_map, bottleneck_ratio, repeat=1, groups=1, stage=1): """ creates a bottleneck block Parameters ---------- x: Input tensor channel_map: list containing the number of output channels for a stage repeat: number of repetitions for a shuffle unit with stride 1 groups: number of groups per channel bottleneck_ratio: bottleneck ratio implies the ratio of bottleneck channels to output channels. stage: stage number Returns ------- """ x = _shuffle_unit(x, in_channels=channel_map[stage - 2], out_channels=channel_map[stage - 1], strides=2, groups=groups, bottleneck_ratio=bottleneck_ratio, stage=stage, block=1) for i in range(1, repeat + 1): x = _shuffle_unit(x, in_channels=channel_map[stage - 1], out_channels=channel_map[stage - 1], strides=1, groups=groups, bottleneck_ratio=bottleneck_ratio, stage=stage, block=(i + 1)) return x def _shuffle_unit(inputs, in_channels, out_channels, groups, bottleneck_ratio, strides=2, stage=1, block=1): """ create a shuffle unit Parameters ---------- inputs: Input tensor of with `channels_last` data format in_channels: number of input channels out_channels: number of output channels strides: An integer or tuple/list of 2 integers, groups: number of groups per channel bottleneck_ratio: float bottleneck ratio implies the ratio of bottleneck channels to output channels. stage: stage number block: block number """ prefix = 'stage%d/block%d' % (stage, block) # if strides >= 2: # out_channels -= in_channels # default: 1/4 of the output channel of a ShuffleNet Unit bottleneck_channels = int(out_channels * bottleneck_ratio) groups = (1 if stage == 2 and block == 1 else groups) # x = _group_conv(inputs, in_channels, out_channels = bottleneck_channels, # groups = (1 if stage == 2 and block == 1 else groups), # name = '%s/1x1_gconv_1' % prefix) x = GroupConv2d(bottleneck_channels, n_groups=(1 if stage == 2 and block == 1 else groups), act='identity', width=1, height=1, stride=1, include_bias=False, name='%s/1x1_gconv_1' % prefix)(inputs) x = BN(act='relu', name='%s/bn_gconv_1' % prefix)(x) x = ChannelShuffle(n_groups=groups, name='%s/channel_shuffle' % prefix)(x) # depthwise convolutioin x = GroupConv2d(x.shape[-1], n_groups=x.shape[-1], width=3, height=3, include_bias=False, stride=strides, act='identity', name='%s/1x1_dwconv_1' % prefix)(x) x = BN(act=block_act, name='%s/bn_dwconv_1' % prefix)(x) out_channels = out_channels if strides == 1 else out_channels - in_channels x = GroupConv2d(out_channels, n_groups=groups, width=1, height=1, stride=1, act='identity', include_bias=False, name='%s/1x1_gconv_2' % prefix)(x) x = BN(act=block_act, name='%s/bn_gconv_2' % prefix)(x) if strides < 2: ret = Res(act='relu', name='%s/add' % prefix)([x, inputs]) else: avg = Pooling(width=3, height=3, stride=2, pool='mean', name='%s/avg_pool' % prefix)(inputs) ret = Concat(act='relu', name='%s/concat' % prefix)([x, avg]) return ret out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384} try: import numpy as np except: raise DLPyError('Please install numpy to use this architecture.') exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0, 0) out_channels_in_stage = 2**exp out_channels_in_stage *= out_dim_stage_two[ groups] # calculate output channels for each stage out_channels_in_stage[0] = 24 # first stage has always 24 output channels out_channels_in_stage *= scale_factor out_channels_in_stage = out_channels_in_stage.astype(int) parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='data') # create shufflenet architecture x = Conv2d(out_channels_in_stage[0], 3, include_bias=False, stride=2, act="identity", name="conv1")(inp) x = BN(act='relu', name='bn1')(x) x = Pooling(width=3, height=3, stride=2, name="maxpool1")(x) # create stages containing shufflenet units beginning at stage 2 for stage in range(0, len(num_shuffle_units)): repeat = num_shuffle_units[stage] x = _block(x, out_channels_in_stage, repeat=repeat, bottleneck_ratio=bottleneck_ratio, groups=groups, stage=stage + 2) x = GlobalAveragePooling2D(name="Global_avg_pool")(x) x = OutputLayer(n=n_classes)(x) model = Model(conn, inputs=inp, outputs=x, model_table=model_table) model.compile() return model
def Faster_RCNN(conn, model_table='Faster_RCNN', n_channels=3, width=1000, height=496, scale=1, norm_stds=None, offsets=(102.9801, 115.9465, 122.7717), random_mutation=None, n_classes=20, anchor_num_to_sample=256, anchor_ratio=[0.5, 1, 2], anchor_scale=[8, 16, 32], base_anchor_size=16, coord_type='coco', max_label_per_image=200, proposed_roi_num_train=2000, proposed_roi_num_score=300, roi_train_sample_num=128, roi_pooling_height=7, roi_pooling_width=7, nms_iou_threshold=0.3, detection_threshold=0.5, max_object_num=50, number_of_neurons_in_fc=4096, backbone='vgg16', random_flip=None, random_crop=None): ''' Generates a deep learning model with the faster RCNN architecture. Parameters ---------- conn : CAS Specifies the connection of the CAS connection. model_table : string, optional Specifies the name of CAS table to store the model. n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 1000 height : int, optional Specifies the height of the input layer. Default: 496 scale : double, optional Specifies a scaling factor to be applied to each pixel intensity values. Default: 1 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 20 anchor_num_to_sample : int, optional Specifies the number of anchors to sample for training the region proposal network Default: 256 anchor_ratio : iter-of-float Specifies the anchor height and width ratios (h/w) used. anchor_scale : iter-of-float Specifies the anchor scales used based on base_anchor_size base_anchor_size : int, optional Specifies the basic anchor size in width and height (in pixels) in the original input image dimension Default: 16 coord_type : int, optional Specifies the coordinates format type in the input label and detection result. Valid Values: RECT, COCO, YOLO Default: COCO proposed_roi_num_score: int, optional Specifies the number of ROI (Region of Interest) to propose in the scoring phase Default: 300 proposed_roi_num_train: int, optional Specifies the number of ROI (Region of Interest) to propose used for RPN training, and also the pool to sample from for FastRCNN Training in the training phase Default: 2000 roi_train_sample_num: int, optional Specifies the number of ROIs(Regions of Interests) to sample after NMS(Non-maximum Suppression) is performed in the training phase. Default: 128 roi_pooling_height : int, optional Specifies the output height of the region pooling layer. Default: 7 roi_pooling_width : int, optional Specifies the output width of the region pooling layer. Default: 7 max_label_per_image : int, optional Specifies the maximum number of labels per image in the training. Default: 200 nms_iou_threshold: float, optional Specifies the IOU threshold of maximum suppression in object detection Default: 0.3 detection_threshold : float, optional Specifies the threshold for object detection. Default: 0.5 max_object_num: int, optional Specifies the maximum number of object to detect Default: 50 number_of_neurons_in_fc: int, or list of int, optional Specifies the number of neurons in the last two fully connected layers. If one int is set, then both of the layers will have the same values. If a list is set, then the layers get different number of neurons. Default: 4096 backbone: string, optional Specifies the architecture to be used as the feature extractor. Valid values: vgg16 Default: vgg16, resnet50, resnet18, resnet34, mobilenetv1, mobilenetv2 random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' Returns ------- :class:`Sequential` References ---------- https://arxiv.org/abs/1506.01497 ''' # calculate number of anchors that equal to product of length of anchor_ratio and length of anchor_scale num_anchors = len(anchor_ratio) * len(anchor_scale) parameters = locals() # get parameters of input, rpn, fast_rcnn layer input_parameters = get_layer_options(input_layer_options, parameters) rpn_parameters = get_layer_options(rpn_layer_options, parameters) fast_rcnn_parameters = get_layer_options(fast_rcnn_options, parameters) inp = Input(**input_parameters, name='data') if backbone.lower() == 'vgg16': # backbone is VGG16 model conv1_1 = Conv2d(n_filters=64, width=3, height=3, stride=1, name='conv1_1')(inp) conv1_2 = Conv2d(n_filters=64, width=3, height=3, stride=1, name='conv1_2')(conv1_1) pool1 = Pooling(width=2, height=2, stride=2, pool='max', name='pool1')(conv1_2) conv2_1 = Conv2d(n_filters=128, width=3, height=3, stride=1, name='conv2_1')(pool1) conv2_2 = Conv2d(n_filters=128, width=3, height=3, stride=1, name='conv2_2')(conv2_1) pool2 = Pooling(width=2, height=2, stride=2, pool='max')(conv2_2) conv3_1 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_1')(pool2) conv3_2 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_2')(conv3_1) conv3_3 = Conv2d(n_filters=256, width=3, height=3, stride=1, name='conv3_3')(conv3_2) pool3 = Pooling(width=2, height=2, stride=2, pool='max')(conv3_3) conv4_1 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_1')(pool3) conv4_2 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_2')(conv4_1) conv4_3 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv4_3')(conv4_2) pool4 = Pooling(width=2, height=2, stride=2, pool='max')(conv4_3) conv5_1 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_1')(pool4) conv5_2 = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_2')(conv5_1) # feature of Conv5_3 is used to generate region proposals last_layer_in_backbone = Conv2d(n_filters=512, width=3, height=3, stride=1, name='conv5_3')(conv5_2) # two convolutions build on top of conv5_3 and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) # given ROIs, crop on conv5_3 and resize the feature to the same size roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone.shape[0] / width, name='roi_pooling')([last_layer_in_backbone, rp1]) elif backbone.lower() == 'resnet50': from .resnet import ResNet50_SAS backbone = ResNet50_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'resnet34': from .resnet import ResNet34_SAS backbone = ResNet34_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'resnet18': from .resnet import ResNet18_SAS backbone = ResNet18_SAS(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'mobilenetv1': from .mobilenet import MobileNetV1 backbone = MobileNetV1(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) elif backbone.lower() == 'mobilenetv2': from .mobilenet import MobileNetV2 backbone = MobileNetV2(conn, width=width, height=height) backbone.layers[-2].src_layers backbone_with_last = backbone.to_functional_model( stop_layers=backbone.layers[-2]) last_layer_in_backbone = backbone_with_last(inp) # two convolutions build on top of f_ex and reduce feature map depth to 6*number_anchors rpn_conv = Conv2d(width=3, n_filters=512, name='rpn_conv_3x3')(last_layer_in_backbone) rpn_score = Conv2d(act='identity', width=1, n_filters=((1 + 1 + 4) * num_anchors), name='rpn_score')(rpn_conv) # propose anchors, NMS, select anchors to train RPN, produce ROIs rp1 = RegionProposal(**rpn_parameters, name='rois')(rpn_score) roipool1 = ROIPooling( output_height=roi_pooling_height, output_width=roi_pooling_width, spatial_scale=last_layer_in_backbone[0].shape.output_size[0] / height, name='roi_pooling')([last_layer_in_backbone[0], rp1]) else: raise DLPyError('We are not supporting this backbone yet.') # fully connect layer to extract the feature of ROIs if number_of_neurons_in_fc is None: fc6 = Dense(n=4096, act='relu', name='fc6')(roipool1) fc7 = Dense(n=4096, act='relu', name='fc7')(fc6) else: if isinstance(number_of_neurons_in_fc, list): if len(number_of_neurons_in_fc) > 1: fc6 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc[1], act='relu', name='fc7')(fc6) else: fc6 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc[0], act='relu', name='fc7')(fc6) else: fc6 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc6')(roipool1) fc7 = Dense(n=number_of_neurons_in_fc, act='relu', name='fc7')(fc6) # classification tensor cls1 = Dense(n=n_classes + 1, act='identity', name='cls_score')(fc7) # regression tensor(second stage bounding box regression) reg1 = Dense(n=(n_classes + 1) * 4, act='identity', name='bbox_pred')(fc7) # task layer receive cls1, reg1 and rp1(ground truth). Train the second stage. fr1 = FastRCNN(**fast_rcnn_parameters, class_number=n_classes, name='fastrcnn')([cls1, reg1, rp1]) faster_rcnn = Model(conn, inp, fr1, model_table=model_table) faster_rcnn.compile() return faster_rcnn
def UNet(conn, model_table='UNet', n_classes=2, n_channels=1, width=256, height=256, scale=1.0 / 255, norm_stds=None, offsets=None, random_mutation=None, init=None, bn_after_convolutions=False, random_flip=None, random_crop=None): ''' Generates a deep learning model with the U-Net architecture. Parameters ---------- conn : CAS Specifies the connection of the CAS connection. model_table : string, optional Specifies the name of CAS table to store the model. n_classes : int, optional Specifies the number of classes. If None is assigned, the model will automatically detect the number of classes based on the training set. Default: 2 n_channels : int, optional Specifies the number of the channels (i.e., depth) of the input layer. Default: 3 width : int, optional Specifies the width of the input layer. Default: 256 height : int, optional Specifies the height of the input layer. Default: 256 scale : double, optional Specifies a scaling factor to be applied to each pixel intensity values. Default: 1.0/255 norm_stds : double or iter-of-doubles, optional Specifies a standard deviation for each channel in the input data. The final input data is normalized with specified means and standard deviations. offsets : double or iter-of-doubles, optional Specifies an offset for each channel in the input data. The final input data is set after applying scaling and subtracting the specified offsets. random_mutation : string, optional Specifies how to apply data augmentations/mutations to the data in the input layer. Valid Values: 'none', 'random' init : str Specifies the initialization scheme for convolution layers. Valid Values: XAVIER, UNIFORM, NORMAL, CAUCHY, XAVIER1, XAVIER2, MSRA, MSRA1, MSRA2 Default: None bn_after_convolutions : Boolean If set to True, a batch normalization layer is added after each convolution layer. random_flip : string, optional Specifies how to flip the data in the input layer when image data is used. Approximately half of the input data is subject to flipping. Valid Values: 'h', 'hv', 'v', 'none' random_crop : string, optional Specifies how to crop the data in the input layer when image data is used. Images are cropped to the values that are specified in the width and height parameters. Only the images with one or both dimensions that are larger than those sizes are cropped. Valid Values: 'none', 'unique', 'randomresized', 'resizethencrop' Returns ------- :class:`Sequential` References ---------- https://arxiv.org/pdf/1505.04597 ''' parameters = locals() input_parameters = get_layer_options(input_layer_options, parameters) inp = Input(**input_parameters, name='data') act_conv = 'relu' bias_conv = True if bn_after_convolutions: act_conv = 'identity' bias_conv = False # The model follows UNet paper architecture. The network down-samples by performing max pooling with stride=2 conv1 = Conv2d(64, 3, act=act_conv, init=init, include_bias=bias_conv)(inp) conv1 = BN(act='relu')(conv1) if bn_after_convolutions else conv1 conv1 = Conv2d(64, 3, act=act_conv, init=init, include_bias=bias_conv)(conv1) conv1 = BN(act='relu')(conv1) if bn_after_convolutions else conv1 pool1 = Pooling(2)(conv1) conv2 = Conv2d(128, 3, act=act_conv, init=init, include_bias=bias_conv)(pool1) conv2 = BN(act='relu')(conv2) if bn_after_convolutions else conv2 conv2 = Conv2d(128, 3, act=act_conv, init=init, include_bias=bias_conv)(conv2) conv2 = BN(act='relu')(conv2) if bn_after_convolutions else conv2 pool2 = Pooling(2)(conv2) conv3 = Conv2d(256, 3, act=act_conv, init=init, include_bias=bias_conv)(pool2) conv3 = BN(act='relu')(conv3) if bn_after_convolutions else conv3 conv3 = Conv2d(256, 3, act=act_conv, init=init, include_bias=bias_conv)(conv3) conv3 = BN(act='relu')(conv3) if bn_after_convolutions else conv3 pool3 = Pooling(2)(conv3) conv4 = Conv2d(512, 3, act=act_conv, init=init, include_bias=bias_conv)(pool3) conv4 = BN(act='relu')(conv4) if bn_after_convolutions else conv4 conv4 = Conv2d(512, 3, act=act_conv, init=init, include_bias=bias_conv)(conv4) conv4 = BN(act='relu')(conv4) if bn_after_convolutions else conv4 pool4 = Pooling(2)(conv4) conv5 = Conv2d(1024, 3, act=act_conv, init=init, include_bias=bias_conv)(pool4) conv5 = BN(act='relu')(conv5) if bn_after_convolutions else conv5 conv5 = Conv2d(1024, 3, act=act_conv, init=init, include_bias=bias_conv)(conv5) conv5 = BN(act='relu')(conv5) if bn_after_convolutions else conv5 # the minimum is 1/2^4 of the original image size # Our implementation applies Transpose convolution to upsample feature maps. tconv6 = Conv2DTranspose(512, 3, stride=2, act='relu', padding=1, output_size=conv4.shape, init=init)(conv5) # 64 # concatenation layers to combine encoder and decoder features merge6 = Concat()([conv4, tconv6]) conv6 = Conv2d(512, 3, act=act_conv, init=init, include_bias=bias_conv)(merge6) conv6 = BN(act='relu')(conv6) if bn_after_convolutions else conv6 conv6 = Conv2d(512, 3, act=act_conv, init=init, include_bias=bias_conv)(conv6) conv6 = BN(act='relu')(conv6) if bn_after_convolutions else conv6 tconv7 = Conv2DTranspose(256, 3, stride=2, act='relu', padding=1, output_size=conv3.shape, init=init)(conv6) # 128 merge7 = Concat()([conv3, tconv7]) conv7 = Conv2d(256, 3, act=act_conv, init=init, include_bias=bias_conv)(merge7) conv7 = BN(act='relu')(conv7) if bn_after_convolutions else conv7 conv7 = Conv2d(256, 3, act=act_conv, init=init, include_bias=bias_conv)(conv7) conv7 = BN(act='relu')(conv7) if bn_after_convolutions else conv7 tconv8 = Conv2DTranspose(128, stride=2, act='relu', padding=1, output_size=conv2.shape, init=init)(conv7) # 256 merge8 = Concat()([conv2, tconv8]) conv8 = Conv2d(128, 3, act=act_conv, init=init, include_bias=bias_conv)(merge8) conv8 = BN(act='relu')(conv8) if bn_after_convolutions else conv8 conv8 = Conv2d(128, 3, act=act_conv, init=init, include_bias=bias_conv)(conv8) conv8 = BN(act='relu')(conv8) if bn_after_convolutions else conv8 tconv9 = Conv2DTranspose(64, stride=2, act='relu', padding=1, output_size=conv1.shape, init=init)(conv8) # 512 merge9 = Concat()([conv1, tconv9]) conv9 = Conv2d(64, 3, act=act_conv, init=init, include_bias=bias_conv)(merge9) conv9 = BN(act='relu')(conv9) if bn_after_convolutions else conv9 conv9 = Conv2d(64, 3, act=act_conv, init=init, include_bias=bias_conv)(conv9) conv9 = BN(act='relu')(conv9) if bn_after_convolutions else conv9 conv9 = Conv2d(n_classes, 3, act='relu', init=init)(conv9) seg1 = Segmentation(name='Segmentation_1')(conv9) model = Model(conn, inputs=inp, outputs=seg1, model_table=model_table) model.compile() return model
def build_embedding_model(cls, branch, model_table=None, embedding_model_type='Siamese', embedding_layer=None, margin=None): ''' Build an embedding model based on a given model branch and model type Parameters ---------- branch : Model Specifies the base model that is used as branches for embedding model. model_table : string or dict or CAS table, optional Specifies the CAS table to store the deep learning model. Default: None embedding_model_type : string, optional Specifies the embedding model type that the created table will be applied for training. Valid values: Siamese, Triplet, and Quartet. Default: Siamese embedding_layer: Layer, optional Specifies a dense layer as the embedding layer. For instance, Dense(n=10, act='identity') defines the embedding dimension is 10. When it is not given, the last layer (except the task layers) in the branch model will be used as the embedding layer. margin: double, optional Specifies the margin value used by the embedding model. When it is not given, for Siamese, margin is 2.0. Otherwise, margin is 0.0. Returns ------- :class:`Model` ''' # check the branch type if not isinstance(branch, Model): raise DLPyError('The branch option must contain a valid model') # the branch must be built using functional APIs # only functional model has the attr output_layers if not hasattr(branch, 'output_layers'): print("NOTE: Convert the branch model into a functional model.") branch_tensor = branch.to_functional_model() else: branch_tensor = deepcopy(branch) # always reset this local tensor to 0 branch_tensor.number_of_instances = 0 # the branch cannot contain other task layers if len(branch_tensor.output_layers) != 1: raise DLPyError( 'The branch model cannot contain more than one output layer') elif branch_tensor.output_layers[0].type == OutputLayer.type or \ branch_tensor.output_layers[0].type == Keypoints.type: print("NOTE: Remove the task layers from the model.") branch_tensor.layers.remove(branch_tensor.output_layers[0]) branch_tensor.output_layers[0] = branch_tensor.layers[-1] elif branch_tensor.output_layers[0].can_be_last_layer: raise DLPyError( 'The branch model cannot contain task layer except output or keypoints layer.' ) # check embedding_model_type if embedding_model_type.lower() not in [ 'siamese', 'triplet', 'quartet' ]: raise DLPyError('Only Siamese, Triplet, and Quartet are valid.') if embedding_model_type.lower() == 'siamese': if margin is None: margin = 2.0 cls.number_of_branches = 2 elif embedding_model_type.lower() == 'triplet': if margin is None: margin = 0.0 cls.number_of_branches = 3 elif embedding_model_type.lower() == 'quartet': if margin is None: margin = 0.0 cls.number_of_branches = 4 cls.embedding_model_type = embedding_model_type # build the branches input_layers = [] branch_layers = [] for i_branch in range(cls.number_of_branches): temp_input_layer = Input(**branch_tensor.layers[0].config, name=cls.input_layer_name_prefix + str(i_branch)) temp_branch = branch_tensor( temp_input_layer) # return a list of tensors if embedding_layer: temp_embed_layer = deepcopy(embedding_layer) temp_embed_layer.name = cls.embedding_layer_name_prefix + str( i_branch) temp_branch = temp_embed_layer(temp_branch) # change tensor to a list temp_branch = [temp_branch] else: # change the last layer name to the embedding layer name temp_branch[ -1]._op.name = cls.embedding_layer_name_prefix + str( i_branch) if i_branch == 0: cls.branch_input_tensor = temp_input_layer if len(temp_branch) == 1: cls.branch_output_tensor = temp_branch[0] else: cls.branch_output_tensor = temp_branch # append these layers to the current branch input_layers.append(temp_input_layer) branch_layers = branch_layers + temp_branch # add the embedding loss layer loss_layer = EmbeddingLoss( margin=margin, name=cls.embedding_loss_layer_name)(branch_layers) # create the model DAG using all the above model information model = EmbeddingModel(branch.conn, model_table=model_table, inputs=input_layers, outputs=loss_layer) # sharing weights # get all layer names from one branch num_l = int((len(model.layers) - 1) / cls.number_of_branches) br1_name = [i.name for i in model.layers[:num_l - 1]] # build the list that contain the shared layers share_list = [] n_id = 0 n_to = n_id + cls.number_of_branches for l in br1_name[1:]: share_list.append( {l: [l + '_' + str(i + 1) for i in range(n_id + 1, n_to)]}) # add embedding layers share_list.append({ cls.embedding_layer_name_prefix + str(0): [ cls.embedding_layer_name_prefix + str(i) for i in range(1, cls.number_of_branches) ] }) model.share_weights(share_list) model.compile() # generate data_specs if embedding_model_type.lower() == 'siamese': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='numnom', layer=cls.embedding_loss_layer_name, data=['_dissimilar_']) ] elif embedding_model_type.lower() == 'triplet': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '2', data=['_image_2']) ] elif embedding_model_type.lower() == 'quartet': cls.data_specs = [ DataSpec(type_='image', layer=cls.input_layer_name_prefix + '0', data=['_image_']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '1', data=['_image_1']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '2', data=['_image_2']), DataSpec(type_='image', layer=cls.input_layer_name_prefix + '3', data=['_image_3']) ] return model