Beispiel #1
0
def compute_mask_loss(
    boxes,
    masks,
    annotations,
    masks_target,
    width,
    height,
    iou_threshold=0.5,
    mask_size=(28, 28)
):
    # compute overlap of boxes with annotations
    iou                  = backend.overlap(boxes, annotations)
    argmax_overlaps_inds = keras.backend.argmax(iou, axis=1)
    max_iou              = keras.backend.max(iou, axis=1)

    # filter those with IoU > 0.5
    indices              = keras_retinanet.backend.where(keras.backend.greater_equal(max_iou, iou_threshold))
    boxes                = keras_retinanet.backend.gather_nd(boxes, indices)
    masks                = keras_retinanet.backend.gather_nd(masks, indices)
    argmax_overlaps_inds = keras.backend.cast(keras_retinanet.backend.gather_nd(argmax_overlaps_inds, indices), 'int32')
    labels               = keras.backend.cast(keras.backend.gather(annotations[:, 4], argmax_overlaps_inds), 'int32')

    # make normalized boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    boxes = keras.backend.stack([
        y1 / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
        x1 / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
        (y2 - 1) / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
        (x2 - 1) / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
    ], axis=1)

    # crop and resize masks_target
    masks_target = keras.backend.expand_dims(masks_target, axis=3)  # append a fake channel dimension
    masks_target = backend.crop_and_resize(
        masks_target,
        boxes,
        argmax_overlaps_inds,
        mask_size
    )
    masks_target = masks_target[:, :, :, 0]  # remove fake channel dimension

    # gather the predicted masks using the annotation label
    masks = backend.transpose(masks, (0, 3, 1, 2))
    label_indices = keras.backend.stack([
        keras.backend.arange(keras.backend.shape(labels)[0]),
        labels
    ], axis=1)
    masks = keras_retinanet.backend.gather_nd(masks, label_indices)

    # compute mask loss
    mask_loss  = keras.backend.binary_crossentropy(masks_target, masks)
    normalizer = keras.backend.shape(masks)[0] * keras.backend.shape(masks)[1] * keras.backend.shape(masks)[2]
    normalizer = keras.backend.maximum(keras.backend.cast(normalizer, keras.backend.floatx()), 1)
    mask_loss  = keras.backend.sum(mask_loss) / normalizer

    return mask_loss
Beispiel #2
0
        def _roi_align(args):
            boxes = args[0]
            scores = args[1]
            fpn = args[2]

            # compute from which level to get features from
            target_levels = self.map_to_level(boxes)

            # process each pyramid independently
            rois = []
            ordered_indices = []
            for i in range(len(fpn)):
                # select the boxes and classification from this pyramid level
                indices = keras_retinanet.backend.where(
                    keras.backend.equal(target_levels, i))
                ordered_indices.append(indices)

                level_boxes = keras_retinanet.backend.gather_nd(boxes, indices)
                fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]),
                                               dtype=keras.backend.floatx())

                # convert to expected format for crop_and_resize
                x1 = level_boxes[:, 0]
                y1 = level_boxes[:, 1]
                x2 = level_boxes[:, 2]
                y2 = level_boxes[:, 3]
                level_boxes = keras.backend.stack([
                    (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),
                    (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),
                    (y2 / image_shape[1] * fpn_shape[0] - 1) /
                    (fpn_shape[0] - 1),
                    (x2 / image_shape[2] * fpn_shape[1] - 1) /
                    (fpn_shape[1] - 1),
                ],
                                                  axis=1)

                # append the rois to the list of rois
                rois.append(
                    backend.crop_and_resize(
                        keras.backend.expand_dims(fpn[i], axis=0),
                        level_boxes,
                        tf.zeros(
                            (keras.backend.shape(level_boxes)[0], ),
                            dtype='int32'
                        ),  # TODO: Remove this workaround (https://github.com/tensorflow/tensorflow/issues/33787).
                        self.crop_size))

            # concatenate rois to one blob
            rois = keras.backend.concatenate(rois, axis=0)

            # reorder rois back to original order
            indices = keras.backend.concatenate(ordered_indices, axis=0)
            rois = keras_retinanet.backend.scatter_nd(
                indices, rois,
                keras.backend.cast(keras.backend.shape(rois), 'int64'))

            return rois
Beispiel #3
0
    def call(self, inputs, **kwargs):
        # TODO: Support batch_size > 1
        image_shape = keras.backend.cast(inputs[0], keras.backend.floatx())
        boxes       = keras.backend.stop_gradient(inputs[1][0])
        scores      = keras.backend.stop_gradient(inputs[2][0])
        fpn         = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]]

        # compute from which level to get features from
        target_levels = self.map_to_level(boxes)

        # process each pyramid independently
        rois           = []
        ordered_indices = []
        for i in range(len(fpn)):
            # select the boxes and classification from this pyramid level
            indices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i))
            ordered_indices.append(indices)

            level_boxes = keras_retinanet.backend.gather_nd(boxes, indices)
            fpn_shape   = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx())

            # convert to expected format for crop_and_resize
            x1 = level_boxes[:, 0]
            y1 = level_boxes[:, 1]
            x2 = level_boxes[:, 2]
            y2 = level_boxes[:, 3]
            level_boxes = keras.backend.stack([
                (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),
                (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),
                (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1),
                (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1),
            ], axis=1)

            # append the rois to the list of rois
            rois.append(backend.crop_and_resize(
                keras.backend.expand_dims(fpn[i], axis=0),
                level_boxes,
                keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'),
                self.crop_size
            ))

        # concatenate rois to one blob
        rois = keras.backend.concatenate(rois, axis=0)

        # reorder rois back to original order
        indices = keras.backend.concatenate(ordered_indices, axis=0)
        rois    = keras_retinanet.backend.scatter_nd(indices, rois, keras.backend.cast(keras.backend.shape(rois), 'int64'))

        return keras.backend.expand_dims(rois, axis=0)
Beispiel #4
0
    def call(self, inputs, **kwargs):
        # TODO: Support batch_size > 1
        image_shape    = keras.backend.cast(inputs[0], keras.backend.floatx())
        boxes          = keras.backend.stop_gradient(inputs[1][0])
        classification = keras.backend.stop_gradient(inputs[2][0])
        fpn            = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]]

        # compute best scores for each detection
        scores = keras.backend.max(classification, axis=1)

        # select the top k for mask ROI computation
        _, indices     = keras_retinanet.backend.top_k(scores, k=keras.backend.minimum(self.top_k, keras.backend.shape(boxes)[0]), sorted=False)
        boxes          = keras.backend.gather(boxes, indices)
        classification = keras.backend.gather(classification, indices)

        # compute from which level to get features from
        target_levels = self.map_to_level(boxes)

        # process each pyramid independently
        rois                   = []
        ordered_boxes          = []
        ordered_classification = []
        for i in range(len(fpn)):
            # select the boxes and classification from this pyramid level
            indices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i))

            level_boxes          = keras_retinanet.backend.gather_nd(boxes, indices)
            level_classification = keras_retinanet.backend.gather_nd(classification, indices)

            ordered_boxes.append(level_boxes)
            ordered_classification.append(level_classification)

            fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx())

            # convert to expected format for crop_and_resize
            x1 = level_boxes[:, 0]
            y1 = level_boxes[:, 1]
            x2 = level_boxes[:, 2]
            y2 = level_boxes[:, 3]
            level_boxes = keras.backend.stack([
                (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),
                (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),
                (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1),
                (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1),
            ], axis=1)

            # append the rois to the list of rois
            rois.append(backend.crop_and_resize(
                keras.backend.expand_dims(fpn[i], axis=0),
                level_boxes,
                keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'),
                self.crop_size
            ))

        # reassemble the boxes in a different order
        boxes          = keras.backend.concatenate(ordered_boxes, axis=0)
        classification = keras.backend.concatenate(ordered_classification, axis=0)

        # concatenate rois to one blob
        rois = keras.backend.concatenate(rois, axis=0)
        return [keras.backend.expand_dims(boxes, axis=0), keras.backend.expand_dims(classification, axis=0), keras.backend.expand_dims(rois, axis=0)]
Beispiel #5
0
    def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)):
        # split up the different predicted blobs
        boxes = y_pred[:, :, :4]
        masks = y_pred[:, :, 4:]

        # split up the different blobs
        annotations = y_true[:, :, :5]
        width = keras.backend.cast(y_true[0, 0, 5], dtype='int32')
        height = keras.backend.cast(y_true[0, 0, 6], dtype='int32')
        masks_target = y_true[:, :, 7:]

        # reshape the masks back to their original size
        masks_target = keras.backend.reshape(
            masks_target,
            (keras.backend.shape(masks_target)[0],
             keras.backend.shape(masks_target)[1], height, width))
        masks = keras.backend.reshape(
            masks,
            (keras.backend.shape(masks)[0], keras.backend.shape(masks)[1],
             mask_size[0], mask_size[1], -1))

        # TODO: Fix batch_size > 1
        boxes = boxes[0]
        masks = masks[0]
        annotations = annotations[0]
        masks_target = masks_target[0]

        # compute overlap of boxes with annotations
        iou = backend.overlap(boxes, annotations)
        argmax_overlaps_inds = keras.backend.argmax(iou, axis=1)
        max_iou = keras.backend.max(iou, axis=1)

        # filter those with IoU > 0.5
        indices = keras_retinanet.backend.where(
            keras.backend.greater_equal(max_iou, iou_threshold))
        boxes = keras_retinanet.backend.gather_nd(boxes, indices)
        masks = keras_retinanet.backend.gather_nd(masks, indices)
        argmax_overlaps_inds = keras.backend.cast(
            keras_retinanet.backend.gather_nd(argmax_overlaps_inds, indices),
            'int32')
        labels = keras.backend.cast(
            keras.backend.gather(annotations[:, 4], argmax_overlaps_inds),
            'int32')

        # make normalized boxes
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        boxes = keras.backend.stack([
            y1 /
            (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
            x1 / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
            (y2 - 1) /
            (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1),
            (x2 - 1) /
            (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1),
        ],
                                    axis=1)

        # crop and resize masks_target
        masks_target = keras.backend.expand_dims(
            masks_target, axis=3)  # append a fake channel dimension
        masks_target = backend.crop_and_resize(masks_target, boxes,
                                               argmax_overlaps_inds, mask_size)
        masks_target = masks_target[:, :, :,
                                    0]  # remove fake channel dimension

        # gather the predicted masks using the annotation label
        masks = backend.transpose(masks, (0, 3, 1, 2))
        label_indices = keras.backend.stack(
            [keras.backend.arange(keras.backend.shape(labels)[0]), labels],
            axis=1)
        masks = keras_retinanet.backend.gather_nd(masks, label_indices)

        # compute mask loss
        mask_loss = keras.backend.binary_crossentropy(masks_target, masks)
        normalizer = keras.backend.shape(masks)[0] * keras.backend.shape(
            masks)[1] * keras.backend.shape(masks)[2]
        normalizer = keras.backend.maximum(
            keras.backend.cast(normalizer, keras.backend.floatx()), 1)
        mask_loss = keras.backend.sum(mask_loss) / normalizer

        return mask_loss