def _postprocess(results, result_mask_info, output_height, output_width, mask_threshold=0.5): """ Post-process the output boxes for TensorMask. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will postprocess the raw outputs of TensorMask to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. Note that it does not contain the field `pred_masks`, which is provided by another input `result_masks`. result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results. The first item is a list of #detection tensors, each is the predicted masks. The second item is the anchors corresponding to the predicted masks. output_height, output_width: the desired output resolution. Returns: Instances: the postprocessed output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) results = Instances((output_height, output_width), **results.get_fields()) output_boxes = results.pred_boxes output_boxes.tensor[:, 0::2] *= scale_x output_boxes.tensor[:, 1::2] *= scale_y output_boxes.clip(results.image_size) inds_nonempty = output_boxes.nonempty() results = results[inds_nonempty] result_masks, result_anchors = result_mask_info if result_masks: result_anchors.tensor[:, 0::2] *= scale_x result_anchors.tensor[:, 1::2] *= scale_y result_masks = [ x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i ] results.pred_masks = _paste_mask_lists_in_image( result_masks, result_anchors[inds_nonempty], results.image_size, threshold=mask_threshold, ) return results
def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) results = Instances((output_height, output_width), **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = paste_masks_in_image( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results