def context_enhancement_module(x1, x2, x3, size, name='cem_block'): x1 = conv1x1(x1, in_channels=x1.shape[3], out_channels=245, strides=1, groups=1, use_bias=True, name='{}/c4_lat'.format(name)) x2 = nn.Lambda(lambda img: tf.image.resize_bilinear( img, [20, 20], align_corners=True, name='{}/c5_resize'.format(name)))( x2) x2 = conv1x1(x2, in_channels=x2.shape[3], out_channels=245, strides=1, groups=1, use_bias=True, name='{}/c5_lat'.format(name)) zero = K.zeros((1, size, size, 528)) x3 = nn.Lambda(lambda img: nn.add([img, zero]))(x3) x3 = conv1x1(x3, in_channels=x3.shape[3], out_channels=245, strides=1, groups=1, use_bias=True, name='{}/c_glb_lat'.format(name)) print(x1) return nn.add([x1, x2, x3])
def classifier_layer(base_layers, input_rois, num_rois, nb_classes=3): """Create a classifier layer Args: base_layers: snet input_rois: `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) num_rois: number of rois to be processed in one time (4 in here) nb_classes: default number of classes Returns: list(out_class, out_regr) out_class: classifier layer output out_regr: regression layer output """ x = conv1x1(base_layers, in_channels=base_layers.shape[3], out_channels=245, strides=1, groups=1, use_bias=True, name='sam/conv1x1') x = batchnorm(x, name='sam/bn') x = Lambda(K.sigmoid)(x) x = multiply([x, base_layers]) pooling_regions = 7 alpha = 5 # out_roi_pool.shape = (1, num_rois, channels, pool_size, pool_size) # num_rois (4) 7x7 roi pooling # out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([x, input_rois]) out_roi_pool = PSRoiAlignPooling(pooling_regions, num_rois, alpha)([x, input_rois]) # Flatten the convlutional layer and connected to 2 FC and 2 dropout out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) out = TimeDistributed(Dense(1024, activation='relu', name='fc'))(out) out = TimeDistributed(Dropout(0.5))(out) # There are two output layer out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def rpn_layer(base_layers, num_anchors): """Create a rpn layer Step1: Pass through the feature map from base layer to a 256 channels convolutional layer Keep the padding 'same' to preserve the feature map's size Step2: Pass the step1 to two (1,1) convolutional layer to replace the fully connected layer classification layer: num_anchors (9 in here) channels for 0, 1 sigmoid activation output regression layer: num_anchors*4 (36 here) channels for regression of bboxes with linear activation Args: base_layers: snet in here num_anchors: 9 in here Returns: [x_class, x_regr, base_layers] x_class: classification for whether it's an object x_regr: bboxes regression base_layers: snet in here """ x = depthwise_conv5x5(base_layers, channels=245, strides=1, name='rpn/conv5x5') x = conv1x1(x, in_channels=x.shape[3], out_channels=256, strides=1, groups=1, use_bias=True, name='rpn/conv1x1') x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x) x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x) return [x_class, x_regr, base_layers]
def shuffle_unit(x, in_channels, out_channels, downsample, use_se, use_residual, name="shuffle_unit"): mid_channels = out_channels // 2 if downsample: y1 = depthwise_conv5x5(x=x, channels=in_channels, strides=2, name=name + "/dw_conv4") y1 = batchnorm(x=y1, name=name + "/dw_bn4") y1 = conv1x1(x=y1, in_channels=in_channels, out_channels=mid_channels, name=name + "/expand_conv5") y1 = batchnorm(x=y1, name=name + "/expand_bn5") y1 = nn.Activation("relu", name=name + "/expand_activ5")(y1) x2 = x else: in_split2_channels = in_channels // 2 if is_channels_first(): y1 = nn.Lambda(lambda z: z[:, 0:in_split2_channels, :, :])(x) x2 = nn.Lambda(lambda z: z[:, in_split2_channels:, :, :])(x) else: y1 = nn.Lambda(lambda z: z[:, :, :, 0:in_split2_channels])(x) x2 = nn.Lambda(lambda z: z[:, :, :, in_split2_channels:])(x) y2 = conv1x1(x=x2, in_channels=(in_channels if downsample else mid_channels), out_channels=mid_channels, name=name + "/compress_conv1") y2 = batchnorm(x=y2, name=name + "/compress_bn1") y2 = nn.Activation("relu", name=name + "/compress_activ1")(y2) y2 = depthwise_conv5x5(x=y2, channels=mid_channels, strides=(2 if downsample else 1), name=name + "/dw_conv2") y2 = batchnorm(x=y2, name=name + "/dw_bn2") y2 = conv1x1(x=y2, in_channels=mid_channels, out_channels=mid_channels, name=name + "/expand_conv3") y2 = batchnorm(x=y2, name=name + "/expand_bn3") y2 = nn.Activation("relu", name=name + "/expand_activ3")(y2) if use_se: y2 = se_block(x=y2, channels=mid_channels, name=name + "/se") if use_residual and not downsample: y2 = nn.add([y2, x2], name=name + "/add") x = nn.concatenate([y1, y2], axis=get_channel_axis(), name=name + "/concat") x = channel_shuffle_lambda(channels=out_channels, groups=2, name=name + "/c_shuffle")(x) return x