Esempio n. 1
0
def fpn_mask_graph(rois,
                   feature_maps,
                   image_meta,
                   pool_size,
                   num_classes,
                   train_bn=True):
    """Builds the computation graph of the mask head of Feature Pyramid Network.
    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from diffent layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layres
    Returns: Masks [batch, roi_count, height, width, num_classes]
    """
    # ROI Pooling
    # Shape: [batch, boxes, pool_height, pool_width, channels]
    x = modellib.PyramidROIAlign([pool_size, pool_size],
                                 name="roi_align_mask")([rois, image_meta] +
                                                        feature_maps)

    # Conv layers
    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv1")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv2")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv3")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn3')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv4")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(),
                           name='mrcnn_mask_bn4')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2),
                                              strides=2,
                                              activation="relu"),
                           name="mrcnn_mask_deconv")(x)
    x = KL.TimeDistributed(KL.Conv2D(1, (1, 1),
                                     strides=1,
                                     activation="sigmoid"),
                           name="mrcnn_mask")(x)
    # Duplicate output for fg/bg detections
    x = KL.Concatenate(axis=-1)([x for i in range(num_classes)])
    return x
Esempio n. 2
0
def fpn_classifier_graph(rois, feature_maps, image_meta,
                         pool_size, num_classes, train_bn=True, fc_layers_size=1024):
    """Builds the computation graph of the feature pyramid network classifier
    and regressor heads.
    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from diffent layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layres
    Returns:
        logits: [N, NUM_CLASSES] classifier logits (before softmax)
        probs: [N, NUM_CLASSES] classifier probabilities
        bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to
                     proposal boxes
    """
    # ROI Pooling
    # Shape: [batch, num_boxes, pool_height, pool_width, channels]
    x = modellib.PyramidROIAlign([pool_size, pool_size],
                        name="roi_align_classifier")([rois, image_meta] + feature_maps)
    # Two 1024 FC layers (implemented with Conv2D for consistency)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
                           name="mrcnn_class_conv1")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
                           name="mrcnn_class_conv2")(x)
    x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                       name="pool_squeeze")(x)

    # Classifier head
    mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
                                            name='mrcnn_class_logits')(shared)
    mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
                                     name="mrcnn_class")(mrcnn_class_logits)

    # BBox head
    # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))]
    x = KL.TimeDistributed(KL.Dense(4, activation='linear'),
                           name='mrcnn_bbox_fc')(shared)
    # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))]
    s = K.int_shape(x)
    x = KL.Reshape((s[1],1, 4), name="mrcnn_bbox")(x)
    # Duplicate output for fg/bg detections
    mrcnn_bbox = KL.Concatenate(axis=-2)([x for i in range(num_classes)])

    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
Esempio n. 3
0
def fpn_orientation_graph(rois,
                          feature_maps,
                          mrcnn_probs,
                          mrcnn_bbox,
                          image_meta,
                          pool_size,
                          train_bn=True):
    """Builds the computation graph of the feature pyramid network orientation
     heads.

    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from different layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    mrcnn_probs: classifier probabilities.
    mrcnn_bbox: Deltas to apply to proposal boxes
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    train_bn: Boolean. Train or freeze Batch Norm layers

    Returns:
        r_matrices: [batch, 3, 3] rotation matrices
        angles: [batch,, 3] rotation angles in Euler ZYX (radians)
    """

    x = model.PyramidROIAlign(
        [pool_size, pool_size],
        name="roi_align_orientation")([rois, image_meta] + feature_maps)
    # Two 1024 FC layers (implemented with Conv2D for consistency)
    x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size),
                                     padding="valid"),
                           name="mrcnn_or_conv1")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_or_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_or_conv2")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_or_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                  name="pool_squeeze_or")(x)

    # Add class probabilities
    x = KL.Concatenate(axis=2)([x, mrcnn_probs])

    # Add detected bounding box
    s = K.int_shape(mrcnn_bbox)
    mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox)
    x = KL.Concatenate(axis=2)([x, mrcnn_bbox])

    x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d1')(x)
    x = KL.LeakyReLU(alpha=0.2)(x)
    x = KL.TimeDistributed(KL.BatchNormalization(),
                           name='mrcnn_or_bn3')(x, training=train_bn)
    x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d2')(x)
    x = KL.LeakyReLU(alpha=0.2)(x)
    x = KL.TimeDistributed(KL.BatchNormalization(),
                           name='mrcnn_or_bn4')(x, training=train_bn)
    x = KL.TimeDistributed(KL.Dense(6), name='mrcnn_or_d5')(x)

    #s = K.int_shape(x)
    #x = KL.Lambda(lambda t: tf.reshape(t, (-1, s[2])))(x)

    r_matrices = KL.TimeDistributed(
        KL.Lambda(lambda t: or_tools.compute_rotation_matrix_from_ortho6d(t)))(
            x)
    #r_matrices = KL.TimeDistributed(KL.Reshape((-1, s[1], 3, 3))(r_matrices))
    angles = KL.TimeDistributed(
        KL.Lambda(lambda x: or_tools.
                  compute_euler_angles_from_rotation_matrices(x)))(r_matrices)
    #angles = KL.Reshape((-1, s[1], 3))(angles)

    return r_matrices, angles
Esempio n. 4
0
def fpn_orientation_graph(rois,
                          feature_maps,
                          mrcnn_probs,
                          mrcnn_bbox,
                          image_meta,
                          pool_size,
                          train_bn=True):
    """Builds the computation graph of the feature pyramid network orientation
     heads.

    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from different layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    mrcnn_probs: classifier probabilities.
    mrcnn_bbox: Deltas to apply to proposal boxes
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    train_bn: Boolean. Train or freeze Batch Norm layers

    Returns:
        logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax)
        probs: [batch, num_rois, NUM_CLASSES] classifier probabilities
    """
    # ROI Pooling
    # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
    x = model.PyramidROIAlign(
        [pool_size, pool_size],
        name="roi_align_orientation")([rois, image_meta] + feature_maps)

    x = KL.TimeDistributed(KL.Conv2D(256, (5, 5), padding="valid"),
                           name="mrcnn_orientation_conv1")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"),
                           name="mrcnn_orientation_conv2")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"),
                           name="mrcnn_orientation_conv3")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn3')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    # Two 1024 FC layers (implemented with Conv2D for consistency)
    # First layer
    x = KL.TimeDistributed(KL.Conv2D(1024, (6, 6), padding="valid"),
                           name="mrcnn_orientation_conv4")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn4')(x, training=train_bn)
    x = KL.Activation('relu')(x)
    # Second layer
    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)),
                           name="mrcnn_orientation_conv5")(x)
    x = KL.TimeDistributed(model.BatchNorm(),
                           name='mrcnn_orientation_bn5')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    # Squeezed feature maps
    # [batch, num_rois, fc_layers_size]
    shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
                       name="pool_squeeze_orientation")(x)

    # Add class probabilities
    shared = KL.Concatenate(axis=2)([shared, mrcnn_probs])

    # Add detected bounding box
    s = K.int_shape(mrcnn_bbox)
    mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox)
    shared = KL.Concatenate(axis=2)([shared, mrcnn_bbox])

    logits = []
    probs = []
    res = []
    '''
    for angle in range(0,3):
        for bin in range(0,2):
            bin_logits, bin_prob, bin_res = bin_block(shared, angle, bin, train_bn)
            logits.append(bin_logits)
            probs.append(bin_prob)
            res.append(bin_res)
    '''

    for angle in range(0, 3):
        bin_logits, bin_prob, bin_res = angle_block(shared, angle, train_bn)
        logits.append(bin_logits)
        probs.append(bin_prob)
        res.append(bin_res)

    logits = KL.Concatenate(axis=2)(logits)
    probs = KL.Concatenate(axis=2)(probs)
    res = KL.Concatenate(axis=2)(res)

    #logits, probs, res =  full_block(shared, train_bn)

    return logits, probs, res