def compute_mask_loss( boxes, masks, annotations, masks_target, width, height, iou_threshold=0.5, mask_size=(28, 28) ): # compute overlap of boxes with annotations iou = backend.overlap(boxes, annotations) argmax_overlaps_inds = keras.backend.argmax(iou, axis=1) max_iou = keras.backend.max(iou, axis=1) # filter those with IoU > 0.5 indices = keras_retinanet.backend.where(keras.backend.greater_equal(max_iou, iou_threshold)) boxes = keras_retinanet.backend.gather_nd(boxes, indices) masks = keras_retinanet.backend.gather_nd(masks, indices) argmax_overlaps_inds = keras.backend.cast(keras_retinanet.backend.gather_nd(argmax_overlaps_inds, indices), 'int32') labels = keras.backend.cast(keras.backend.gather(annotations[:, 4], argmax_overlaps_inds), 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = keras.backend.stack([ y1 / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1), x1 / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1), (y2 - 1) / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1), (x2 - 1) / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1), ], axis=1) # crop and resize masks_target masks_target = keras.backend.expand_dims(masks_target, axis=3) # append a fake channel dimension masks_target = backend.crop_and_resize( masks_target, boxes, argmax_overlaps_inds, mask_size ) masks_target = masks_target[:, :, :, 0] # remove fake channel dimension # gather the predicted masks using the annotation label masks = backend.transpose(masks, (0, 3, 1, 2)) label_indices = keras.backend.stack([ keras.backend.arange(keras.backend.shape(labels)[0]), labels ], axis=1) masks = keras_retinanet.backend.gather_nd(masks, label_indices) # compute mask loss mask_loss = keras.backend.binary_crossentropy(masks_target, masks) normalizer = keras.backend.shape(masks)[0] * keras.backend.shape(masks)[1] * keras.backend.shape(masks)[2] normalizer = keras.backend.maximum(keras.backend.cast(normalizer, keras.backend.floatx()), 1) mask_loss = keras.backend.sum(mask_loss) / normalizer return mask_loss
def _roi_align(args): boxes = args[0] scores = args[1] fpn = args[2] # compute from which level to get features from target_levels = self.map_to_level(boxes) # process each pyramid independently rois = [] ordered_indices = [] for i in range(len(fpn)): # select the boxes and classification from this pyramid level indices = keras_retinanet.backend.where( keras.backend.equal(target_levels, i)) ordered_indices.append(indices) level_boxes = keras_retinanet.backend.gather_nd(boxes, indices) fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx()) # convert to expected format for crop_and_resize x1 = level_boxes[:, 0] y1 = level_boxes[:, 1] x2 = level_boxes[:, 2] y2 = level_boxes[:, 3] level_boxes = keras.backend.stack([ (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1), (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1), (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1), (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1), ], axis=1) # append the rois to the list of rois rois.append( backend.crop_and_resize( keras.backend.expand_dims(fpn[i], axis=0), level_boxes, tf.zeros( (keras.backend.shape(level_boxes)[0], ), dtype='int32' ), # TODO: Remove this workaround (https://github.com/tensorflow/tensorflow/issues/33787). self.crop_size)) # concatenate rois to one blob rois = keras.backend.concatenate(rois, axis=0) # reorder rois back to original order indices = keras.backend.concatenate(ordered_indices, axis=0) rois = keras_retinanet.backend.scatter_nd( indices, rois, keras.backend.cast(keras.backend.shape(rois), 'int64')) return rois
def call(self, inputs, **kwargs): # TODO: Support batch_size > 1 image_shape = keras.backend.cast(inputs[0], keras.backend.floatx()) boxes = keras.backend.stop_gradient(inputs[1][0]) scores = keras.backend.stop_gradient(inputs[2][0]) fpn = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]] # compute from which level to get features from target_levels = self.map_to_level(boxes) # process each pyramid independently rois = [] ordered_indices = [] for i in range(len(fpn)): # select the boxes and classification from this pyramid level indices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i)) ordered_indices.append(indices) level_boxes = keras_retinanet.backend.gather_nd(boxes, indices) fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx()) # convert to expected format for crop_and_resize x1 = level_boxes[:, 0] y1 = level_boxes[:, 1] x2 = level_boxes[:, 2] y2 = level_boxes[:, 3] level_boxes = keras.backend.stack([ (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1), (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1), (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1), (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1), ], axis=1) # append the rois to the list of rois rois.append(backend.crop_and_resize( keras.backend.expand_dims(fpn[i], axis=0), level_boxes, keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'), self.crop_size )) # concatenate rois to one blob rois = keras.backend.concatenate(rois, axis=0) # reorder rois back to original order indices = keras.backend.concatenate(ordered_indices, axis=0) rois = keras_retinanet.backend.scatter_nd(indices, rois, keras.backend.cast(keras.backend.shape(rois), 'int64')) return keras.backend.expand_dims(rois, axis=0)
def call(self, inputs, **kwargs): # TODO: Support batch_size > 1 image_shape = keras.backend.cast(inputs[0], keras.backend.floatx()) boxes = keras.backend.stop_gradient(inputs[1][0]) classification = keras.backend.stop_gradient(inputs[2][0]) fpn = [keras.backend.stop_gradient(i[0]) for i in inputs[3:]] # compute best scores for each detection scores = keras.backend.max(classification, axis=1) # select the top k for mask ROI computation _, indices = keras_retinanet.backend.top_k(scores, k=keras.backend.minimum(self.top_k, keras.backend.shape(boxes)[0]), sorted=False) boxes = keras.backend.gather(boxes, indices) classification = keras.backend.gather(classification, indices) # compute from which level to get features from target_levels = self.map_to_level(boxes) # process each pyramid independently rois = [] ordered_boxes = [] ordered_classification = [] for i in range(len(fpn)): # select the boxes and classification from this pyramid level indices = keras_retinanet.backend.where(keras.backend.equal(target_levels, i)) level_boxes = keras_retinanet.backend.gather_nd(boxes, indices) level_classification = keras_retinanet.backend.gather_nd(classification, indices) ordered_boxes.append(level_boxes) ordered_classification.append(level_classification) fpn_shape = keras.backend.cast(keras.backend.shape(fpn[i]), dtype=keras.backend.floatx()) # convert to expected format for crop_and_resize x1 = level_boxes[:, 0] y1 = level_boxes[:, 1] x2 = level_boxes[:, 2] y2 = level_boxes[:, 3] level_boxes = keras.backend.stack([ (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1), (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1), (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1), (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1), ], axis=1) # append the rois to the list of rois rois.append(backend.crop_and_resize( keras.backend.expand_dims(fpn[i], axis=0), level_boxes, keras.backend.zeros((keras.backend.shape(level_boxes)[0],), dtype='int32'), self.crop_size )) # reassemble the boxes in a different order boxes = keras.backend.concatenate(ordered_boxes, axis=0) classification = keras.backend.concatenate(ordered_classification, axis=0) # concatenate rois to one blob rois = keras.backend.concatenate(rois, axis=0) return [keras.backend.expand_dims(boxes, axis=0), keras.backend.expand_dims(classification, axis=0), keras.backend.expand_dims(rois, axis=0)]
def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)): # split up the different predicted blobs boxes = y_pred[:, :, :4] masks = y_pred[:, :, 4:] # split up the different blobs annotations = y_true[:, :, :5] width = keras.backend.cast(y_true[0, 0, 5], dtype='int32') height = keras.backend.cast(y_true[0, 0, 6], dtype='int32') masks_target = y_true[:, :, 7:] # reshape the masks back to their original size masks_target = keras.backend.reshape( masks_target, (keras.backend.shape(masks_target)[0], keras.backend.shape(masks_target)[1], height, width)) masks = keras.backend.reshape( masks, (keras.backend.shape(masks)[0], keras.backend.shape(masks)[1], mask_size[0], mask_size[1], -1)) # TODO: Fix batch_size > 1 boxes = boxes[0] masks = masks[0] annotations = annotations[0] masks_target = masks_target[0] # compute overlap of boxes with annotations iou = backend.overlap(boxes, annotations) argmax_overlaps_inds = keras.backend.argmax(iou, axis=1) max_iou = keras.backend.max(iou, axis=1) # filter those with IoU > 0.5 indices = keras_retinanet.backend.where( keras.backend.greater_equal(max_iou, iou_threshold)) boxes = keras_retinanet.backend.gather_nd(boxes, indices) masks = keras_retinanet.backend.gather_nd(masks, indices) argmax_overlaps_inds = keras.backend.cast( keras_retinanet.backend.gather_nd(argmax_overlaps_inds, indices), 'int32') labels = keras.backend.cast( keras.backend.gather(annotations[:, 4], argmax_overlaps_inds), 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = keras.backend.stack([ y1 / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1), x1 / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1), (y2 - 1) / (keras.backend.cast(height, dtype=keras.backend.floatx()) - 1), (x2 - 1) / (keras.backend.cast(width, dtype=keras.backend.floatx()) - 1), ], axis=1) # crop and resize masks_target masks_target = keras.backend.expand_dims( masks_target, axis=3) # append a fake channel dimension masks_target = backend.crop_and_resize(masks_target, boxes, argmax_overlaps_inds, mask_size) masks_target = masks_target[:, :, :, 0] # remove fake channel dimension # gather the predicted masks using the annotation label masks = backend.transpose(masks, (0, 3, 1, 2)) label_indices = keras.backend.stack( [keras.backend.arange(keras.backend.shape(labels)[0]), labels], axis=1) masks = keras_retinanet.backend.gather_nd(masks, label_indices) # compute mask loss mask_loss = keras.backend.binary_crossentropy(masks_target, masks) normalizer = keras.backend.shape(masks)[0] * keras.backend.shape( masks)[1] * keras.backend.shape(masks)[2] normalizer = keras.backend.maximum( keras.backend.cast(normalizer, keras.backend.floatx()), 1) mask_loss = keras.backend.sum(mask_loss) / normalizer return mask_loss