Beispiel #1
0
def get_box_ap_dict_graph(n_class_ids, gt_class_ids, gt_boxes, detection_boxes,
                          detection_classes, detection_scores):
    """
    Get ap tp/fp list of the detection boxes in an image.

    n_class_ids: int. The number of classes.
    gt_class_ids: (max_instance_per_img)
    gt_boxes: (max_instance_per_img, [y1, x1, y2, x2])
    detection_boxes: (detection_max_instance, [y1, x1, y2, x2])
    detection_classes: (detection_max_instance, [class_id])
    detection_scores: (detection_max_instance, [score])

    return:
        class_ap_dict: {class_id: [confidence, judge]}
    """
    # Create ap dict. {class_id: [confidence, judge]}
    class_ap_dict = {}
    for i in range(n_class_ids):
        class_ap_dict[i + 1] = []

    for class_id_from_zero in range(n_class_ids):
        class_id = class_id_from_zero + 1
        gt_index = gt_class_ids.eq(class_id)
        gt_box = gt_boxes[gt_index]  # (n_gt_box)

        detection_index = detection_classes.eq(class_id)
        confidence = detection_scores[detection_index]  # (n_detection_box)
        detection_box = detection_boxes[
            detection_index]  # (n_detection_box, 4)

        if gt_box.shape[0] == 0:
            tp_index = set()
        else:
            overlaps = Utils.compute_overlaps(
                detection_box, gt_box)  # (n_detection_box, n_gt_box)
            # 1. For every gt box, get the max IoU as tp.
            if overlaps.shape[0] > 1:
                tp_index1 = overlaps.argmax(dim=0)  # (n_gt_box)
            else:
                tp_index1 = torch.tensor([0], dtype=torch.int32)
            # 2. Get the index of the box which has IoU>0.5.
            tp_index2 = overlaps.gt(0.5).nonzero()[:, 0]
            # 3. Take intersection set.
            tp_index1 = tp_index1.cpu().numpy().tolist()
            tp_index2 = tp_index2.cpu().numpy().tolist()
            tp_index = set(tp_index1).intersection(set(tp_index2))

        # Append [confidence, judge] for specific class_id.
        for n in range(confidence.shape[0]):
            if n in tp_index:
                judge = 'tp'
            else:
                judge = 'fp'
            class_ap_dict[class_id].append([confidence[n].cpu().item(), judge])
    return class_ap_dict
Beispiel #2
0
    def detection_targets_graph(self, proposals, gt_class_ids, gt_boxes,
                                gt_masks):
        """
        Subsample proposals for one image (i.e. one batch) by splitting positive and negative proposals.

        proposals: (N, [y1, x1, y2, x2]). Proposals in normalized coordinates after ProposalLayer. Might be zero padded
                   if there are not enough  proposals.
        gt_class_ids: (all_GT_instances). Class IDs.
        gt_boxes: (all_GT_instances, [y1, x1, y2, x2]). Ground truth boxes in normalized coordinates.
        gt_masks: (all_GT_instances, height, width). Ground truth masks of boolen type.

        return:
            rois: (n, 4). With zero paddings.
            roi_gt_class_ids: (n). With zero paddings.
            deltas: (n, 4). With zero paddings.
            roi_gt_masks_minibox: (n, mask_h, mask_w)
        """
        # Remove zero padding
        proposals, _ = Utils.trim_zero_graph(proposals)
        gt_boxes, non_zeros_ix = Utils.trim_zero_graph(gt_boxes)
        gt_class_ids = torch.index_select(gt_class_ids,
                                          dim=0,
                                          index=non_zeros_ix)
        gt_masks = torch.index_select(gt_masks, dim=0, index=non_zeros_ix)

        # Compute overlaps.
        overlaps = Utils.compute_overlaps(
            proposals, gt_boxes)  # (n_proposals, n_gt_boxes)

        # Determine positive and negative ROIs.
        # To every proposal, get the max IoU with all the gt boxes.
        proposal_iou_max, _ = torch.max(overlaps, dim=1)
        # Positive rois are those with >= 0.5 IoU with a GT box.
        # Negative rois are those with < 0.5 IoU with every GT box.
        positive_roi_bool = torch.gt(
            proposal_iou_max,
            torch.tensor([self.positive_iou_threshold],
                         dtype=proposal_iou_max.dtype,
                         device=proposal_iou_max.device))
        positive_ix = torch.nonzero(positive_roi_bool)
        negative_ix = torch.nonzero(~positive_roi_bool)
        # print(positive_ix.shape, negative_ix.shape)

        # Subsample rois to make positive/all = proposal_positive_ratio
        # 1. Positive rois (proposal_positive_ratio * train_proposals_per_image, 4)
        positive_count = int(self.roi_positive_ratio *
                             self.train_rois_per_image)
        # TODO: Need shuffle on positive_ix and negative_ix before index selecting.
        positive_ix = positive_ix[0:positive_count].squeeze(1)
        # 2. Negative rois ((1-1/proposal_positive_ratio)*positive_count, 4)
        # Should calculated by this formula because positive rois may be not enough.
        negative_count = int(
            (1 / self.roi_positive_ratio - 1) * positive_count)
        negative_ix = negative_ix[0:negative_count].squeeze(1)
        # 3. Gather selected rois
        positive_rois = torch.index_select(proposals, dim=0, index=positive_ix)
        negative_rois = torch.index_select(proposals, dim=0, index=negative_ix)

        # Assign positive rois to corresponding GT boxes
        # positive overlaps: (n_positive, n_gt_boxes)
        positive_overlaps = torch.index_select(overlaps,
                                               dim=0,
                                               index=positive_ix)
        # roi_gt_box_assignment: (n_positive), best corresponding GT box ids of every ROI
        if positive_overlaps.shape[0] > 0:
            roi_gt_box_assignment = torch.argmax(positive_overlaps, dim=1).to(
                positive_overlaps.device)
        else:
            roi_gt_box_assignment = torch.tensor([], dtype=torch.int64).to(
                positive_overlaps.device)
        # roi_gt_boxes: (n_positive, 4). roi_gt_class_ids: (n_positive)
        roi_gt_boxes = torch.index_select(gt_boxes,
                                          dim=0,
                                          index=roi_gt_box_assignment)
        roi_gt_class_ids = torch.index_select(gt_class_ids,
                                              dim=0,
                                              index=roi_gt_box_assignment)

        # Compute deltas from positive_rois to roi_gt_boxes. (n_positive, 4)
        # TODO: BBOX_STD_DEV?
        deltas = Utils.compute_deltas(positive_rois, roi_gt_boxes)

        # Assign positive ROIs to corresponding GT masks. And permute to (n_positive, 1, height, weight)
        permuted_gt_masks = torch.unsqueeze(gt_masks, dim=1)
        roi_gt_masks = torch.index_select(permuted_gt_masks,
                                          dim=0,
                                          index=roi_gt_box_assignment)

        # Get masks in roi boxes. (n_positive, mask_h, mask_w)
        # TODO: normalize_to_mini_mask?
        positive_rois_transformed = transform_coordianates(
            positive_rois, gt_masks.shape[1:])
        box_ids = torch.unsqueeze(torch.arange(0, roi_gt_masks.shape[0]),
                                  dim=1).to(roi_gt_masks.dtype).to(
                                      roi_gt_masks.device)
        positive_rois_transformed = torch.cat(
            [box_ids, positive_rois_transformed], dim=1)
        roi_gt_masks_minibox = ops.roi_align(roi_gt_masks,
                                             positive_rois_transformed,
                                             self.mask_shape)
        # Remove the extra dimension from masks.
        roi_gt_masks_minibox = torch.squeeze(roi_gt_masks_minibox, dim=1)
        # Threshold mask pixels at 0.5(have decimal cecause of RoiAlign) to have GT masks be 0 or 1
        # to use with binary cross entropy loss.
        roi_gt_masks_minibox = torch.round(roi_gt_masks_minibox)

        # Append negative ROIs and pad zeros for negative ROIs' bbox deltas and masks.
        rois = torch.cat([positive_rois, negative_rois], dim=0)
        n_nagetvie = negative_rois.shape[0]
        n_padding = torch.tensor(
            max(self.train_rois_per_image - rois.shape[0], 0))
        # Padding
        rois = torch.nn.functional.pad(rois, pad=[0, 0, 0, n_padding])
        roi_gt_boxes = torch.nn.functional.pad(
            roi_gt_boxes, pad=[0, 0, 0, n_padding + n_nagetvie])
        roi_gt_class_ids = torch.nn.functional.pad(
            roi_gt_class_ids, pad=[0, n_padding + n_nagetvie])
        deltas = torch.nn.functional.pad(deltas,
                                         pad=[0, 0, 0, n_padding + n_nagetvie])
        roi_gt_masks_minibox = torch.nn.functional.pad(
            roi_gt_masks_minibox, pad=[0, 0, 0, 0, 0, n_padding + n_nagetvie])

        # TODO: require grad?
        deltas = deltas.detach()
        roi_gt_masks_minibox = roi_gt_masks_minibox.detach()
        return rois, roi_gt_class_ids, deltas, roi_gt_masks_minibox