def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), use_bias=True, train_bn=True): """conv_block is the block that has a conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers Note that from stage 3, the first conv layer at main path is with subsample=(2,2) And the shortcut should have subsample=(2,2) as well """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) x = layers.BatchNorm(name=bn_name_base + '2a')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', use_bias=use_bias)(x) x = layers.BatchNorm(name=bn_name_base + '2b')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=use_bias)(x) x = layers.BatchNorm(name=bn_name_base + '2c')(x, training=train_bn) shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', use_bias=use_bias)(input_tensor) shortcut = layers.BatchNorm(name=bn_name_base + '1')(shortcut, training=train_bn) x = KL.Add()([x, shortcut]) x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) return x
def fpn_classifier_graph(rois, feature_maps, image_meta, pool_size, nb_classes, train_bn=True, fc_layers_size=1024): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. nb_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers fc_layers_size: Size of the 2 FC layers Returns: logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) probs: [batch, num_rois, NUM_CLASSES] classifier probabilities bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = roi_align.PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")([rois, image_meta] + feature_maps) # Two 1024 FC layers (implemented with Conv2D for consistency) x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) # Classifier head mrcnn_class_logits = KL.TimeDistributed(KL.Dense(nb_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) # BBox head # [batch, num_rois, NB_CLASSES * (dy, dx, log(dh), log(dw))] x = KL.TimeDistributed(KL.Dense(nb_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, num_rois, NB_CLASSES, (dy, dx, log(dh), log(dw))] s = K.int_shape(x) mrcnn_bbox = KL.Reshape((s[1], nb_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def build_fpn_mask_graph(rois, feature_maps, image_meta, pool_size, nb_classes, train_bn=True): """Builds the computation graph of the mask head of Feature Pyramid Network. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NB_CLASSES] """ # ROI Pooling # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = roi_align.PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) # Conv layers x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) x = KL.TimeDistributed(KL.Conv2D(nb_classes, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask")(x) return x
def identity_block(input_tensor, kernel_size, filters, stage, block, use_bias=True, train_bn=True): """The identity_block is the block that has no conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the nb_filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names use_bias: Boolean. To use or not use a bias in conv layers. train_bn: Boolean. Train or freeze Batch Norm layers """ nb_filter1, nb_filter2, nb_filter3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) x = layers.BatchNorm(name=bn_name_base + '2a')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', use_bias=use_bias)(x) x = layers.BatchNorm(name=bn_name_base + '2b')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=use_bias)(x) x = layers.BatchNorm(name=bn_name_base + '2c')(x, training=train_bn) x = KL.Add()([x, input_tensor]) x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) return x
def resnet_graph(input_image, architecture, stage5=False, train_bn=True): """Build a ResNet graph. architecture: Can be resnet50 or resnet101 stage5: Boolean. If False, stage5 of the network is not created train_bn: Boolean. Train or freeze Batch Norm layers """ assert architecture in ["resnet50", "resnet101"] # Stage 1 x = KL.ZeroPadding2D((3, 3))(input_image) x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) x = layers.BatchNorm(name='bn_conv1')(x, training=train_bn) x = KL.Activation('relu')(x) C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) # Stage 2 x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), train_bn=train_bn) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', train_bn=train_bn) C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', train_bn=train_bn) # Stage 3 x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', train_bn=train_bn) x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', train_bn=train_bn) x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', train_bn=train_bn) C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn) # Stage 4 x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', train_bn=train_bn) block_count = {"resnet50": 5, "resnet101": 22}[architecture] for i in range(block_count): x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn) C4 = x # Stage 5 if stage5: x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', train_bn=train_bn) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', train_bn=train_bn) C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn) else: C5 = None return [C1, C2, C3, C4, C5]