Beispiel #1
0
def s_identity_block(input_tensor, kernel_size, filters, stage, block, prefix='s_',
                     use_bias=True, train_bn=None):
    """The identity_block is the block that has no conv layer at shortcut
    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of middle conv layer at main path
        filters: list of integers, the nb_filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
        prefix: layer name prefix to distinguish teacher and student network
        use_bias: Boolean. To use or not use a bias in conv layers.
        train_bn: Boolean. Train or freeze Batch Norm layers
    """
    nb_filter1, nb_filter2, nb_filter3 = filters
    conv_name_base = prefix + 'res' + str(stage) + block + '_branch'
    bn_name_base = prefix + 'bn' + str(stage) + block + '_branch'

    x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a',
                  use_bias=use_bias)(input_tensor)
    x = modellib.BatchNorm(name=bn_name_base + '2a')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
                  name=conv_name_base + '2b', use_bias=use_bias)(x)
    x = modellib.BatchNorm(name=bn_name_base + '2b')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c',
                  use_bias=use_bias)(x)
    x = modellib.BatchNorm(name=bn_name_base + '2c')(x, training=train_bn)

    x = KL.Add()([x, input_tensor])
    x = KL.Activation('relu', name=prefix + 'res' + str(stage) + block + '_out')(x)
    return x
Beispiel #2
0
def fpn_mask_graph(rois,
                   feature_maps,
                   image_meta,
                   pool_size,
                   num_classes,
                   train_bn=True):
    """Builds the computation graph of the mask head of Feature Pyramid Network.
    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from diffent layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layres
    Returns: Masks [batch, roi_count, height, width, num_classes]
    """
    # ROI Pooling
    # Shape: [batch, boxes, pool_height, pool_width, channels]
    x = modellib.PyramidROIAlign([pool_size, pool_size],
                                 name="roi_align_mask")([rois, image_meta] +
                                                        feature_maps)

    # Conv layers
    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv1")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv2")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv3")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn3')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv4")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn4')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2),
                                              strides=2,
                                              activation="relu"),
                           name="mrcnn_mask_deconv")(x)
    x = KL.TimeDistributed(KL.Conv2D(1, (1, 1),
                                     strides=1,
                                     activation="sigmoid"),
                           name="mrcnn_mask")(x)
    # Duplicate output for fg/bg detections
    x = KL.Concatenate(axis=-1)([x for i in range(num_classes)])
    return x
Beispiel #3
0
def fpn_classifier_graph(rois, feature_maps, image_meta,
                         pool_size, num_classes, train_bn=True, fc_layers_size=1024):
    """Builds the computation graph of the feature pyramid network classifier
    and regressor heads.
    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from diffent layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layres
    Returns:
        logits: [N, NUM_CLASSES] classifier logits (before softmax)
        probs: [N, NUM_CLASSES] classifier probabilities
        bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to
                     proposal boxes
    """
    # ROI Pooling
    # Shape: [batch, num_boxes, pool_height, pool_width, channels]
    x = modellib.PyramidROIAlign([pool_size, pool_size],
                        name="roi_align_classifier")([rois, image_meta] + feature_maps)
    # Two 1024 FC layers (implemented with Conv2D for consistency)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
                           name="mrcnn_class_conv1")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
                           name="mrcnn_class_conv2")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                       name="pool_squeeze")(x)

    # Classifier head
    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
                                            name='mrcnn_class_logits')(shared)
    mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
                                     name="mrcnn_class")(mrcnn_class_logits)

    # BBox head
    # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))]
    x = KL.TimeDistributed(KL.Dense(4, activation='linear'),
                           name='mrcnn_bbox_fc')(shared)
    # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))]
    s = K.int_shape(x)
    x = KL.Reshape((s[1],1, 4), name="mrcnn_bbox")(x)
    # Duplicate output for fg/bg detections
    mrcnn_bbox = KL.Concatenate(axis=-2)([x for i in range(num_classes)])

    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
Beispiel #4
0
def s_resnet_graph(input_image, architecture, prefix='s_', train_bn=None):
    """Build a ResNet-50/101 graph.
        input_image: input to feed the ResNet graph
        architecture: Can be resnet50 or resnet101
        prefix: layer name prefix to distinguish teacher and student network
        train_bn: Boolean. Train or freeze Batch Norm layers
    """
    assert architecture in ["resnet50", "resnet101"]

    # Stage 1
    x = KL.ZeroPadding2D((3, 3))(input_image)
    x = KL.Conv2D(64, (7, 7), strides=(2, 2), name=prefix + 'conv1', use_bias=True)(x)
    x = modellib.BatchNorm(name=prefix + 'bn_conv1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
    # output: N x 64 x 1/4 x 1/4

    # Stage 2
    x = s_conv_block(x, 3, [64, 64, 256], stage=2, block='a', prefix=prefix, strides=(1, 1), train_bn=train_bn)
    x = s_identity_block(x, 3, [64, 64, 256], stage=2, block='b', prefix=prefix, train_bn=train_bn)
    C2 = x = s_identity_block(x, 3, [64, 64, 256], stage=2, block='c', prefix=prefix, train_bn=train_bn)
    # output: N x 256 x 1/4 x 1/4

    # Stage 3
    x = s_conv_block(x, 3, [128, 128, 512], stage=3, block='a', prefix=prefix, train_bn=train_bn)
    x = s_identity_block(x, 3, [128, 128, 512], stage=3, block='b', prefix=prefix, train_bn=train_bn)
    x = s_identity_block(x, 3, [128, 128, 512], stage=3, block='c', prefix=prefix, train_bn=train_bn)
    C3 = x = s_identity_block(x, 3, [128, 128, 512], stage=3, block='d', prefix=prefix, train_bn=train_bn)
    # output: N x 512 x 1/8 x 1/8

    # Stage 4
    x = s_conv_block(x, 3, [256, 256, 1024], stage=4, block='a', prefix=prefix, train_bn=train_bn)
    block_count = {"resnet50": 5, "resnet101": 22}[architecture]
    for i in range(block_count):
        x = s_identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), prefix=prefix, train_bn=train_bn)
    C4 = x
    # output: N x 1024 x 1/16 x 1/16

    # Stage 5
    x = s_conv_block(x, 3, [512, 512, 2048], stage=5, block='a', prefix=prefix, train_bn=train_bn)
    x = s_identity_block(x, 3, [512, 512, 2048], stage=5, block='b', prefix=prefix, train_bn=train_bn)
    C5 = x = s_identity_block(x, 3, [512, 512, 2048], stage=5, block='c', prefix=prefix, train_bn=train_bn)
    # output: N x 2048 x 1/32 x 1/32

    return [C1, C2, C3, C4, C5]
Beispiel #5
0
def fpn_orientation_graph(rois,
                          feature_maps,
                          mrcnn_probs,
                          mrcnn_bbox,
                          image_meta,
                          pool_size,
                          train_bn=True):
    """Builds the computation graph of the feature pyramid network orientation
     heads.

    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from different layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    mrcnn_probs: classifier probabilities.
    mrcnn_bbox: Deltas to apply to proposal boxes
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    train_bn: Boolean. Train or freeze Batch Norm layers

    Returns:
        r_matrices: [batch, 3, 3] rotation matrices
        angles: [batch,, 3] rotation angles in Euler ZYX (radians)
    """

    x = model.PyramidROIAlign(
        [pool_size, pool_size],
        name="roi_align_orientation")([rois, image_meta] + feature_maps)
    # Two 1024 FC layers (implemented with Conv2D for consistency)
    x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size),
                                     padding="valid"),
                           name="mrcnn_or_conv1")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_or_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_or_conv2")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_or_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                  name="pool_squeeze_or")(x)

    # Add class probabilities
    x = KL.Concatenate(axis=2)([x, mrcnn_probs])

    # Add detected bounding box
    s = K.int_shape(mrcnn_bbox)
    mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox)
    x = KL.Concatenate(axis=2)([x, mrcnn_bbox])

    x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d1')(x)
    x = KL.LeakyReLU(alpha=0.2)(x)
    x = KL.TimeDistributed(KL.BatchNormalization(),
                           name='mrcnn_or_bn3')(x, training=train_bn)
    x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d2')(x)
    x = KL.LeakyReLU(alpha=0.2)(x)
    x = KL.TimeDistributed(KL.BatchNormalization(),
                           name='mrcnn_or_bn4')(x, training=train_bn)
    x = KL.TimeDistributed(KL.Dense(6), name='mrcnn_or_d5')(x)

    #s = K.int_shape(x)
    #x = KL.Lambda(lambda t: tf.reshape(t, (-1, s[2])))(x)

    r_matrices = KL.TimeDistributed(
        KL.Lambda(lambda t: or_tools.compute_rotation_matrix_from_ortho6d(t)))(
            x)
    #r_matrices = KL.TimeDistributed(KL.Reshape((-1, s[1], 3, 3))(r_matrices))
    angles = KL.TimeDistributed(
        KL.Lambda(lambda x: or_tools.
                  compute_euler_angles_from_rotation_matrices(x)))(r_matrices)
    #angles = KL.Reshape((-1, s[1], 3))(angles)

    return r_matrices, angles
def fpn_orientation_graph(rois,
                          feature_maps,
                          mrcnn_probs,
                          mrcnn_bbox,
                          image_meta,
                          pool_size,
                          train_bn=True):
    """Builds the computation graph of the feature pyramid network orientation
     heads.

    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from different layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    mrcnn_probs: classifier probabilities.
    mrcnn_bbox: Deltas to apply to proposal boxes
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    train_bn: Boolean. Train or freeze Batch Norm layers

    Returns:
        logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax)
        probs: [batch, num_rois, NUM_CLASSES] classifier probabilities
    """
    # ROI Pooling
    # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
    x = model.PyramidROIAlign(
        [pool_size, pool_size],
        name="roi_align_orientation")([rois, image_meta] + feature_maps)

    x = KL.TimeDistributed(KL.Conv2D(256, (5, 5), padding="valid"),
                           name="mrcnn_orientation_conv1")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"),
                           name="mrcnn_orientation_conv2")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"),
                           name="mrcnn_orientation_conv3")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn3')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    # Two 1024 FC layers (implemented with Conv2D for consistency)
    # First layer
    x = KL.TimeDistributed(KL.Conv2D(1024, (6, 6), padding="valid"),
                           name="mrcnn_orientation_conv4")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn4')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    # Second layer
    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)),
                           name="mrcnn_orientation_conv5")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn5')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    # Squeezed feature maps
    # [batch, num_rois, fc_layers_size]
    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                       name="pool_squeeze_orientation")(x)

    # Add class probabilities
    shared = KL.Concatenate(axis=2)([shared, mrcnn_probs])

    # Add detected bounding box
    s = K.int_shape(mrcnn_bbox)
    mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox)
    shared = KL.Concatenate(axis=2)([shared, mrcnn_bbox])

    logits = []
    probs = []
    res = []
    '''
    for angle in range(0,3):
        for bin in range(0,2):
            bin_logits, bin_prob, bin_res = bin_block(shared, angle, bin, train_bn)
            logits.append(bin_logits)
            probs.append(bin_prob)
            res.append(bin_res)
    '''

    for angle in range(0, 3):
        bin_logits, bin_prob, bin_res = angle_block(shared, angle, train_bn)
        logits.append(bin_logits)
        probs.append(bin_prob)
        res.append(bin_res)

    logits = KL.Concatenate(axis=2)(logits)
    probs = KL.Concatenate(axis=2)(probs)
    res = KL.Concatenate(axis=2)(res)

    #logits, probs, res =  full_block(shared, train_bn)

    return logits, probs, res