def fpn_mask_graph(rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True): """Builds the computation graph of the mask head of Feature Pyramid Network. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from diffent layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layres Returns: Masks [batch, roi_count, height, width, num_classes] """ # ROI Pooling # Shape: [batch, boxes, pool_height, pool_width, channels] x = modellib.PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) # Conv layers x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_mask_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_mask_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_mask_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_mask_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) x = KL.TimeDistributed(KL.Conv2D(1, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask")(x) # Duplicate output for fg/bg detections x = KL.Concatenate(axis=-1)([x for i in range(num_classes)]) return x
def fpn_classifier_graph(rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True, fc_layers_size=1024): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from diffent layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. - image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layres Returns: logits: [N, NUM_CLASSES] classifier logits (before softmax) probs: [N, NUM_CLASSES] classifier probabilities bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_boxes, pool_height, pool_width, channels] x = modellib.PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")([rois, image_meta] + feature_maps) # Two 1024 FC layers (implemented with Conv2D for consistency) x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")(x) x = KL.TimeDistributed(modellib.BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) # Classifier head mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) # BBox head # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] x = KL.TimeDistributed(KL.Dense(4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] s = K.int_shape(x) x = KL.Reshape((s[1],1, 4), name="mrcnn_bbox")(x) # Duplicate output for fg/bg detections mrcnn_bbox = KL.Concatenate(axis=-2)([x for i in range(num_classes)]) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def fpn_orientation_graph(rois, feature_maps, mrcnn_probs, mrcnn_bbox, image_meta, pool_size, train_bn=True): """Builds the computation graph of the feature pyramid network orientation heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. mrcnn_probs: classifier probabilities. mrcnn_bbox: Deltas to apply to proposal boxes image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. train_bn: Boolean. Train or freeze Batch Norm layers Returns: r_matrices: [batch, 3, 3] rotation matrices angles: [batch,, 3] rotation angles in Euler ZYX (radians) """ x = model.PyramidROIAlign( [pool_size, pool_size], name="roi_align_orientation")([rois, image_meta] + feature_maps) # Two 1024 FC layers (implemented with Conv2D for consistency) x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size), padding="valid"), name="mrcnn_or_conv1")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_or_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_or_conv2")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_or_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze_or")(x) # Add class probabilities x = KL.Concatenate(axis=2)([x, mrcnn_probs]) # Add detected bounding box s = K.int_shape(mrcnn_bbox) mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox) x = KL.Concatenate(axis=2)([x, mrcnn_bbox]) x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d1')(x) x = KL.LeakyReLU(alpha=0.2)(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_or_bn3')(x, training=train_bn) x = KL.TimeDistributed(KL.Dense(1024), name='mrcnn_or_d2')(x) x = KL.LeakyReLU(alpha=0.2)(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_or_bn4')(x, training=train_bn) x = KL.TimeDistributed(KL.Dense(6), name='mrcnn_or_d5')(x) #s = K.int_shape(x) #x = KL.Lambda(lambda t: tf.reshape(t, (-1, s[2])))(x) r_matrices = KL.TimeDistributed( KL.Lambda(lambda t: or_tools.compute_rotation_matrix_from_ortho6d(t)))( x) #r_matrices = KL.TimeDistributed(KL.Reshape((-1, s[1], 3, 3))(r_matrices)) angles = KL.TimeDistributed( KL.Lambda(lambda x: or_tools. compute_euler_angles_from_rotation_matrices(x)))(r_matrices) #angles = KL.Reshape((-1, s[1], 3))(angles) return r_matrices, angles
def fpn_orientation_graph(rois, feature_maps, mrcnn_probs, mrcnn_bbox, image_meta, pool_size, train_bn=True): """Builds the computation graph of the feature pyramid network orientation heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. mrcnn_probs: classifier probabilities. mrcnn_bbox: Deltas to apply to proposal boxes image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. train_bn: Boolean. Train or freeze Batch Norm layers Returns: logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) probs: [batch, num_rois, NUM_CLASSES] classifier probabilities """ # ROI Pooling # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = model.PyramidROIAlign( [pool_size, pool_size], name="roi_align_orientation")([rois, image_meta] + feature_maps) x = KL.TimeDistributed(KL.Conv2D(256, (5, 5), padding="valid"), name="mrcnn_orientation_conv1")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"), name="mrcnn_orientation_conv2")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"), name="mrcnn_orientation_conv3")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) # Two 1024 FC layers (implemented with Conv2D for consistency) # First layer x = KL.TimeDistributed(KL.Conv2D(1024, (6, 6), padding="valid"), name="mrcnn_orientation_conv4")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) # Second layer x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_orientation_conv5")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn5')(x, training=train_bn) x = KL.Activation('relu')(x) # Squeezed feature maps # [batch, num_rois, fc_layers_size] shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze_orientation")(x) # Add class probabilities shared = KL.Concatenate(axis=2)([shared, mrcnn_probs]) # Add detected bounding box s = K.int_shape(mrcnn_bbox) mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox) shared = KL.Concatenate(axis=2)([shared, mrcnn_bbox]) logits = [] probs = [] res = [] ''' for angle in range(0,3): for bin in range(0,2): bin_logits, bin_prob, bin_res = bin_block(shared, angle, bin, train_bn) logits.append(bin_logits) probs.append(bin_prob) res.append(bin_res) ''' for angle in range(0, 3): bin_logits, bin_prob, bin_res = angle_block(shared, angle, train_bn) logits.append(bin_logits) probs.append(bin_prob) res.append(bin_res) logits = KL.Concatenate(axis=2)(logits) probs = KL.Concatenate(axis=2)(probs) res = KL.Concatenate(axis=2)(res) #logits, probs, res = full_block(shared, train_bn) return logits, probs, res