Example #1
0
 def label_proposals(self, proposals, targets):
     proposals_with_gt = []
     self.num_boxes = np.min([len(x.proposal_boxes) for x in proposals])
     for proposals_per_image, targets_per_image in zip(proposals, targets):
         has_gt = len(targets_per_image) > 0
         _, indices = torch.sort(proposals_per_image.objectness_logits,
                                 descending=True)
         sampled_idxs = indices[:self.num_boxes]
         proposals_per_image = proposals_per_image[sampled_idxs]
         match_quality_matrix = pairwise_iou(
             targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
         matched_idxs, matched_labels = self.proposal_matcher(
             match_quality_matrix)
         gt_classes = self._label_proposals(matched_idxs, matched_labels,
                                            targets_per_image.gt_classes)
         proposals_per_image.gt_classes = gt_classes
         if has_gt:
             for (trg_name,
                  trg_value) in targets_per_image.get_fields().items():
                 if trg_name.startswith(
                         "gt_") and not proposals_per_image.has(trg_name):
                     proposals_per_image.set(trg_name,
                                             trg_value[matched_idxs])
         else:
             gt_boxes = Boxes(
                 targets_per_image.gt_boxes.tensor.new_zeros(
                     (len(sampled_idxs), 4)))
             proposals_per_image.gt_boxes = gt_boxes
         proposals_with_gt.append(proposals_per_image)
     return proposals_with_gt
    def get_ground_truth(self,
                         anchors: List[Boxes],
                         gt_instances: List[Instances],
                         num_classes: int) -> Tuple[List[Tensor],
                                                    List[Tensor]]:
        """
        Extract the ground truth classes and boxes from a list of Instances objects.

        Args:
            anchors (List[Boxes]):          A list of #feature level Boxes. The Boxes contains
                                                anchors of this image on the specific feature
                                                level.
            gt_instances (List[Instances]): A list of N `Instances`s. The i-th `Instances`
                                                contains the ground-truth per-instance annotations
                                                for the i-th input image.
            num_classes (int):              The number of classes.

        Returns:
            gt_classes (List[Tensor]):          List of #img tensors. i-th element is a vector of
                                                    classes whose length is the total number of
                                                    anchors across all feature maps
                                                    (sum(Hi * Wi * A)).
                                                    Label values are in {-1, 0, ..., K}, with -1
                                                    means ignore, and K means background.
            matched_gt_boxes (List[Tensor]):    i-th element is a Rx4 tensor, where R is the total
                                                    number of anchors across feature maps.
                                                    The values are the matched gt boxes for each
                                                    anchor.
                                                    Values are undefined for those anchors not
                                                    labeled as foreground.
        """
        anchors_boxes: Boxes = Boxes.cat(anchors)

        gt_classes: List[Tensor] = []
        matched_gt_boxes: List[Tensor] = []

        for gt_instance in gt_instances:
            match_quality_matrix: Tensor = pairwise_iou(gt_instance.gt_boxes,
                                                        anchors_boxes)
            matched_idxs, anchor_classes = self.anchor_matcher(match_quality_matrix)
            del match_quality_matrix

            if len(gt_instance) > 0:
                matched_gt_boxes_i: Tensor = gt_instance.gt_boxes.tensor[matched_idxs]

                gt_classes_i: Tensor = gt_instance.gt_classes[matched_idxs]

                # Anchors with class 0 are treated as background.
                gt_classes_i[anchor_classes == 0] = num_classes
                # Anchors with class -1 are ignored.
                gt_classes_i[anchor_classes == -1] = -1

            else:
                matched_gt_boxes_i = torch.zeros_like(anchors_boxes.tensor)
                gt_classes_i = torch.zeros_like(matched_idxs) + num_classes

            gt_classes.append(gt_classes_i)
            matched_gt_boxes.append(matched_gt_boxes_i)

        return gt_classes, matched_gt_boxes
Example #3
0
    def get_ground_truth(self, anchors, targets):
        """
        Args:
            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
                list of #feature level Boxes. The Boxes contains anchors of
                this image on the specific feature level.
            targets (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.

        Returns:
            gt_classes (Tensor):
                An integer tensor of shape (N, R) storing ground-truth
                labels for each anchor.
                R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels.
                Anchors with an IoU with some target higher than the foreground threshold
                are assigned their corresponding label in the [0, K-1] range.
                Anchors whose IoU are below the background threshold are assigned
                the label "K". Anchors whose IoU are between the foreground and background
                thresholds are assigned a label "-1", i.e. ignore.
            gt_anchors_deltas (Tensor):
                Shape (N, R, 4).
                The last dimension represents ground-truth box2box transform
                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
                The values in the tensor are meaningful only when the corresponding
                anchor is labeled as foreground.
        """
        gt_classes = []
        gt_anchors_deltas = []
        anchors = [Boxes.cat(anchors_i) for anchors_i in anchors]
        # list[Tensor(R, 4)], one for each image

        for anchors_per_image, targets_per_image in zip(anchors, targets):
            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes,
                                                anchors_per_image)
            gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix)

            has_gt = len(targets_per_image) > 0
            if has_gt:
                # ground truth box regression
                matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs]
                gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
                    anchors_per_image.tensor, matched_gt_boxes.tensor)

                gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_classes_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_classes_i[anchor_labels == -1] = -1
            else:
                gt_classes_i = torch.zeros_like(
                    gt_matched_idxs) + self.num_classes
                gt_anchors_reg_deltas_i = torch.zeros_like(
                    anchors_per_image.tensor)

            gt_classes.append(gt_classes_i)
            gt_anchors_deltas.append(gt_anchors_reg_deltas_i)

        return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
    def get_ground_truth(self, centers: torch.Tensor, strides, init_boxes,
                         gt_instances):
        """
        Get gt according to the init box prediction.
        The labels for init boxes are generated from point-based distance matching,
        and the labels refine boxes are generated from the init boxes using the same way
        with RetinaNet, where the init boxes are regarded as anchors.
        Args:
            centers: (X, 2), center coordinates for points in all feature levels.
            strides: (X), strides for each point in all feature levels.
            init_boxes: (N, X, 4), init box predection.
            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.
        Returns:
            Tensor (N, X):
                Foreground/background label for init boxes. It is used to select positions
                where the init box regression loss is computed.
            Tensor (N, X, 4):
                Label for init boxes, will be masked by binary label above.
            Tensor (N, X):
                Classification label at all positions,
                including values -1 for ignoring, [0, self.num_classes -1] fore foreground positions,
                and self.num_classes for background positions.
            Tensor (N, X, 4):
                Label for refine boxes, only foreground positions are considered.
        """
        # the init_bbox uses point-based nearest assign, the refine_bbox uses IoU based assign
        init_objectness_labels = []
        init_bbox_labels = []
        cls_labels = []
        refine_bbox_labels = []
        for i, targets_per_image in enumerate(gt_instances):
            image_size = targets_per_image.image_size
            centers_invalid = (centers[:, 0] >= image_size[1]).logical_or(
                centers[:, 1] >= image_size[0])

            init_objectness_label, init_bbox_label = self.matcher(
                centers, strides, targets_per_image.gt_boxes)
            init_objectness_label[centers_invalid] = 0

            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes,
                                                Boxes(init_boxes[i]))
            gt_matched_idxs, bbox_mached = self.bbox_matcher(
                match_quality_matrix)
            cls_label = targets_per_image.gt_classes[gt_matched_idxs]
            cls_label[bbox_mached == 0] = self.num_classes
            cls_label[centers_invalid] = -1
            refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs]

            init_objectness_labels.append(init_objectness_label)
            init_bbox_labels.append(init_bbox_label)
            cls_labels.append(cls_label)
            refine_bbox_labels.append(refine_bbox_label.tensor)

        return torch.stack(init_objectness_labels), \
               torch.stack(init_bbox_labels), \
               torch.stack(cls_labels), \
               torch.stack(refine_bbox_labels)
def match_pred_to_gt(gt_boxes, pred_boxes, iou_thresh):

    match_quality_matrix = pairwise_iou(pred_boxes, gt_boxes)
    matched_vals, matches = match_quality_matrix.max(dim=1)
    valid_pred_ids = matched_vals < iou_thresh
    matched_pred_ids = np.where(valid_pred_ids)[0]

    return matched_pred_ids
Example #6
0
 def _match_annotations(self, image_annotations, image_predictions):
     # TODO: Evaluate the number of detected instances.
     prediction_boxes = Boxes.cat(_extract_instances_property(image_predictions, "bbox"))
     annotation_boxes = Boxes.cat(_extract_instances_property(image_annotations, "bbox"))
     match_quality_matrix = pairwise_iou(annotation_boxes, prediction_boxes)
     matched_idxs, matched_labels = self._bbox_matcher(match_quality_matrix)
     matched_image_annotations = [image_annotations[i] for i in matched_idxs]
     return matched_image_annotations, matched_labels
Example #7
0
    def test_pairwise_iou(self):
        boxes1, boxes2 = self.create_boxes()
        expected_ious = torch.tensor([
            [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
            [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
        ])

        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
        self.assertTrue(torch.allclose(ious, expected_ious))
Example #8
0
    def bbox_targets(self,
                     candidate_bboxes,
                     gt_bboxes,
                     gt_labels,
                     pos_iou_thr=0.5,
                     neg_iou_thr=0.4,
                     gt_max_matching=True):
        """
        Target assign: MaxIoU assign
        Args:
            candidate_bboxes:
            gt_bboxes:
            gt_labels:
            pos_iou_thr:
            neg_iou_thr:
            gt_max_matching:
        Returns:
        """
        if candidate_bboxes.size(0) == 0 or gt_bboxes.tensor.size(0) == 0:
            raise ValueError('No gt or anchors')

        candidate_bboxes[:, 0].clamp_(min=0)
        candidate_bboxes[:, 1].clamp_(min=0)
        candidate_bboxes[:, 2].clamp_(min=0)
        candidate_bboxes[:, 3].clamp_(min=0)

        num_candidates = candidate_bboxes.size(0)

        overlaps = pairwise_iou(Boxes(candidate_bboxes), gt_bboxes)
        assigned_labels = overlaps.new_full((overlaps.size(0), ),
                                            self.num_classes,
                                            dtype=torch.long)

        # for each anchor, which gt best overlaps with it
        # for each anchor, the max iou of all gts
        max_overlaps, argmax_overlaps = overlaps.max(dim=1)
        # for each gt, which anchor best overlaps with it
        # for each gt, the max iou of all proposals
        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0)

        bg_inds = max_overlaps < neg_iou_thr
        assigned_labels[bg_inds] = self.num_classes

        fg_inds = max_overlaps >= pos_iou_thr
        assigned_labels[fg_inds] = gt_labels[argmax_overlaps[fg_inds]]

        if gt_max_matching:
            fg_inds = torch.nonzero(overlaps == gt_max_overlaps)[:, 0]
            assigned_labels[fg_inds] = gt_labels[argmax_overlaps[fg_inds]]

        assigned_bboxes = overlaps.new_zeros((num_candidates, 4))

        fg_inds = (assigned_labels >= 0) & (assigned_labels !=
                                            self.num_classes)
        assigned_bboxes[fg_inds] = gt_bboxes.tensor[argmax_overlaps[fg_inds]]

        return assigned_bboxes, assigned_labels
Example #9
0
def _build_graph(boxes, iou_threshold):
    """Build graph based on box IoU"""
    # overlaps = box_utils.bbox_overlaps(
    # boxes.astype(dtype=np.float32, copy=False),
    # boxes.astype(dtype=np.float32, copy=False))
    overlaps = pairwise_iou(Boxes(boxes), Boxes(boxes))
    overlaps = overlaps.data.cpu().numpy()

    return (overlaps > iou_threshold).astype(np.float32)
def match_gt_to_pred(gt_boxes, pred_boxes, iou_thresh):

    match_quality_matrix = pairwise_iou(gt_boxes, pred_boxes)
    matched_vals, matches = match_quality_matrix.max(dim=1)
    valid_gt_ids = matched_vals > iou_thresh
    matched_gt_ids = np.where(valid_gt_ids)[0]
    matched_pred_ids = matches[valid_gt_ids]

    return matched_gt_ids, matched_pred_ids
Example #11
0
    def label_anchors(self, anchors, gt_instances):
        """
        Args:
            anchors (list[Boxes]): A list of #feature level Boxes.
                The Boxes contains anchors of this image on the specific feature level.
            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across all feature maps (sum(Hi * Wi * A)).
                Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background.
            list[Tensor]:
                i-th element is a Rx4 tensor, where R is the total number of anchors across
                feature maps. The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as foreground.
        """
        anchors = Boxes.cat(anchors)  # Rx4

        gt_labels, gt_labels_1, gt_labels_2 = [], [], []  #change

        matched_gt_boxes = []
        for gt_per_image in gt_instances:
            match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors)
            matched_idxs, anchor_labels = self.anchor_matcher(
                match_quality_matrix)
            del match_quality_matrix

            if len(gt_per_image) > 0:
                matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs]

                gt_labels_i = gt_per_image.gt_classes[matched_idxs]
                gt_labels_i_1 = gt_per_image.gt_classes_1[matched_idxs]
                gt_labels_i_2 = gt_per_image.gt_classes_2[matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_labels_i[anchor_labels == 0] = self.num_classes
                gt_labels_i_1[anchor_labels == 0] = 3
                gt_labels_i_2[anchor_labels == 0] = 3
                # Anchors with label -1 are ignored.
                gt_labels_i[anchor_labels == -1] = -1
                gt_labels_i_1[anchor_labels == -1] = -1
                gt_labels_i_2[anchor_labels == -1] = -1
            else:
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
                gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes
                gt_labels_i_1 = torch.zeros_like(matched_idxs) + 3
                gt_labels_i_2 = torch.zeros_like(matched_idxs) + 3

            gt_labels.append(gt_labels_i)
            gt_labels_1.append(gt_labels_i_1)
            gt_labels_2.append(gt_labels_i_2)
            matched_gt_boxes.append(matched_gt_boxes_i)

        return gt_labels, gt_labels_1, gt_labels_2, matched_gt_boxes
    def get_ground_truth(self, points: torch.Tensor, init_boxes, gt_instances):
        object_sizes_of_interest = [
            [-1, 64],
            [64, 128],
            [128, 256],
            [256, 512],
            [512, INF],
        ]
        expanded_object_sizes_of_interest = []
        for l, points_per_level in enumerate(points):
            object_sizes_of_interest_per_level = \
                points_per_level.new_tensor(object_sizes_of_interest[l])
            expanded_object_sizes_of_interest.append(
                object_sizes_of_interest_per_level[None].expand(len(points_per_level), -1)
            )
        expanded_object_sizes_of_interest = torch.cat(expanded_object_sizes_of_interest, dim=0)

        init_gt_classes, init_reg_targets = compute_targets_for_locations(
            points, gt_instances, expanded_object_sizes_of_interest,
            self.fpn_strides, self.center_sampling_radius, self.num_classes
        )

        centers = torch.cat(points, 0)  # [X,2]

        cls_labels = []
        refine_bbox_labels = []
        for i, targets_per_image in enumerate(gt_instances):
            image_size = targets_per_image.image_size
            centers_invalid = (centers[:, 0] >= image_size[1]).logical_or(
                centers[:, 1] >= image_size[0])

            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes,
                Boxes(init_boxes[i]))
            gt_matched_idxs, bbox_mached = self.bbox_matcher(match_quality_matrix)
            cls_label = targets_per_image.gt_classes[gt_matched_idxs]
            cls_label[bbox_mached == 0] = self.num_classes
            cls_label[centers_invalid] = -1
            refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs]

            # change bbox to ltrb
            refine_bbox_label = refine_bbox_label.tensor  # [X,4]
            xs, ys = centers[:, 0], centers[:, 1]
            l = xs - refine_bbox_label[:, 0]
            t = ys - refine_bbox_label[:, 1]
            r = refine_bbox_label[:, 2] - xs
            b = refine_bbox_label[:, 3] - ys
            refine_bbox_label = torch.stack([l, t, r, b], dim=1)

            cls_labels.append(cls_label)
            refine_bbox_labels.append(refine_bbox_label)

        refine_gt_classes = torch.stack(cls_labels)
        refine_reg_targets = torch.stack(refine_bbox_labels)

        return init_gt_classes, init_reg_targets, refine_gt_classes, refine_reg_targets
Example #13
0
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, "batch size shoud be equal to 1"
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals["gt_boxes"]
    gt_labels = proposals["gt_classes"]
    gt_scores = proposals["gt_scores"]
    # overlaps = box_utils.bbox_overlaps(
    # all_rois.astype(dtype=np.float32, copy=False),
    # gt_boxes.astype(dtype=np.float32, copy=False))

    overlaps = pairwise_iou(Boxes(all_rois), Boxes(gt_boxes))
    overlaps = overlaps.data.cpu().numpy()

    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    # fg_inds = np.where(max_overlaps >= cfg_TRAIN_FG_THRESH)[0]

    # Select background RoIs as those with < FG_THRESH overlap
    bg_inds = np.where(max_overlaps < cfg_TRAIN_FG_THRESH)[0]

    ig_inds = np.where(max_overlaps < cfg_TRAIN_BG_THRESH)[0]
    cls_loss_weights[ig_inds] = 0.0

    labels[bg_inds] = 0
    gt_assignment[bg_inds] = -1

    img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32)

    for i in xrange(gt_boxes.shape[0]):
        po_index = np.where(gt_assignment == i)[0]
        img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index])
        pc_labels[i] = gt_labels[i, 0]
        pc_count[i] = len(po_index)
        pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]])

    return (
        labels,
        cls_loss_weights,
        gt_assignment,
        pc_labels,
        pc_probs,
        pc_count,
        img_cls_loss_weights,
    )
Example #14
0
    def label_anchors(self, anchors, gt_instances):
        """
        Args:
            anchors (list[Boxes]): A list of #feature level Boxes.
                The Boxes contains anchors of this image on the specific feature level.
            gt_instances (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across all feature maps (sum(Hi * Wi * A)).
                Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background.
            list[Tensor]:
                i-th element is a Rx4 tensor, where R is the total number of anchors across
                feature maps. The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as foreground.
        """
        anchors = Boxes.cat(anchors)  # Rx4
        num_anchors = anchors.tensor.shape[0]

        gt_labels, matched_gt_boxes, matched_gt_marks, matched_gt_marks_labels = [[] for _ in range(4)]

        for gt_per_image in gt_instances:
            match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors)
            matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix)
            del match_quality_matrix

            if len(gt_per_image) > 0:
                matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs]

                matched_gt_marks_iv = gt_per_image.gt_keypoints.tensor[matched_idxs]
                matched_gt_marks_i = matched_gt_marks_iv[:, :, :2].flatten(1)
                matched_gt_marks_labels_i = matched_gt_marks_iv[:, :, 2].flatten(1)
                matched_gt_marks_labels_i, _ = torch.min(matched_gt_marks_labels_i, dim=1)

                gt_labels_i = gt_per_image.gt_classes[matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_labels_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_labels_i[anchor_labels == -1] = -1
            else:
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
                gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes
                matched_gt_marks_i = torch.zeros(num_anchors, self.num_landmark * 2).to(self.device)
                matched_gt_marks_labels_i = torch.zeros(num_anchors).to(self.device)

            gt_labels.append(gt_labels_i)
            matched_gt_boxes.append(matched_gt_boxes_i)
            matched_gt_marks.append(matched_gt_marks_i)
            matched_gt_marks_labels.append(matched_gt_marks_labels_i)

        return gt_labels, matched_gt_boxes, matched_gt_marks, matched_gt_marks_labels
    def get_transform(self, img, boxes):
        """
        Args:
            img (ndarray): of shape HxWxC(RGB). The array can be of type uint8
                in range [0, 255], or floating point in range [0, 255].
            annotations (list[dict[str->str]]):
                Each item in the list is a bbox label of an object. The object is
                    represented by a dict,
                which contains:
                 - bbox (list): bbox coordinates, top left and bottom right.
                 - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`,
                    `XYWH_ABS` and so on...
        """
        sample_mode = (1, *self.min_ious, 0)
        h, w = img.shape[:2]

        boxes = torch.tensor(boxes)

        while True:
            mode = np.random.choice(sample_mode)
            if mode == 1:
                return NoOpTransform()

            min_iou = mode
            for _ in range(50):
                new_w = np.random.uniform(self.min_crop_size * w, w)
                new_h = np.random.uniform(self.min_crop_size * h, h)

                # h / w in [0.5, 2]
                if new_h / new_w < 0.5 or new_h / new_w > 2:
                    continue

                left = np.random.uniform(w - new_w)
                top = np.random.uniform(h - new_h)

                patch = torch.tensor([left, top, left + new_w, top + new_h],
                                     dtype=torch.int)

                overlaps = pairwise_iou(Boxes(patch.view(-1, 4)),
                                        Boxes(boxes.view(-1, 4)))

                if overlaps.min() < min_iou:
                    continue

                # center of boxes should inside the crop img
                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) *
                        (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
                if not mask.any():
                    continue
                return CropTransform(int(left), int(top), int(new_w),
                                     int(new_h))
Example #16
0
def pairwise_tracker(pred1, pred2):
    boxes1 = pred1.get('pred_boxes')
    boxes2 = pred2.get('pred_boxes')

    categories1 = pred1.get('pred_classes')
    categories2 = pred2.get('pred_classes')

    boxes_overlaps = pairwise_iou(boxes1, boxes2)

    objects_overlaps = (categories1[:, None]
                        == categories2[None, :]) * boxes_overlaps

    return objects_overlaps
Example #17
0
    def label_and_sample_proposals(self,
                                   proposals: List[Instances],
                                   targets: List[Instances],
                                   branch: str = "") -> List[Instances]:
        gt_boxes = [x.gt_boxes for x in targets]
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
            matched_idxs, matched_labels = self.proposal_matcher(
                match_quality_matrix)
            sampled_idxs, gt_classes = self._sample_proposals(
                matched_idxs, matched_labels, targets_per_image.gt_classes)

            proposals_per_image = proposals_per_image[sampled_idxs]
            proposals_per_image.gt_classes = gt_classes

            if has_gt:
                sampled_targets = matched_idxs[sampled_idxs]
                for (trg_name,
                     trg_value) in targets_per_image.get_fields().items():
                    if trg_name.startswith(
                            "gt_") and not proposals_per_image.has(trg_name):
                        proposals_per_image.set(trg_name,
                                                trg_value[sampled_targets])
            else:
                gt_boxes = Boxes(
                    targets_per_image.gt_boxes.tensor.new_zeros(
                        (len(sampled_idxs), 4)))
                proposals_per_image.gt_boxes = gt_boxes

            num_bg_samples.append(
                (gt_classes == self.num_classes).sum().item())
            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
            proposals_with_gt.append(proposals_per_image)

        storage = get_event_storage()
        storage.put_scalar("roi_head/num_target_fg_samples_" + branch,
                           np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_target_bg_samples_" + branch,
                           np.mean(num_bg_samples))

        return proposals_with_gt
Example #18
0
def test(cfg, data_loader_iter, anchors, matcher, raw_matcher):
    batched_inputs = next(data_loader_iter)
    gt_instances = [x["instances"].to(device) for x in batched_inputs]
    gt_boxes = [x.gt_boxes for x in gt_instances]
    image_sizes = [x.image_size for x in gt_instances]
    del gt_instances

    for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
        match_quality_matrix = pairwise_iou(gt_boxes_i, anchors)
        raw_matched_idxs, raw_gt_labels_i = raw_matcher(match_quality_matrix)
        matched_idxs, gt_labels_i = matcher(match_quality_matrix)

        import pdb
        pdb.set_trace()
Example #19
0
    def _filter_positive_proposals(
        self,
        proposal_boxes: Boxes,
        scores: torch.Tensor,
        gt_boxes: Boxes,
        gt_classes: torch.Tensor,
    ) -> Tuple[Boxes, torch.Tensor, torch.Tensor]:
        """Filter for desired targets for the DAG algo

        Parameters
        ----------
        proposal_boxes : Boxes
            Proposal boxes directly from RPN
        scores : torch.Tensor
            Softmaxed scores for each proposal box
        gt_boxes : Boxes
            Ground truth boxes
        gt_classes : torch.Tensor
            Ground truth classes

        Returns
        -------
        Tuple[Boxes, torch.Tensor]
            filtered_target_boxes, corresponding_class_labels
        """
        n_proposals = len(proposal_boxes)

        proposal_gt_ious = pairwise_iou(proposal_boxes, gt_boxes)

        # For each proposal_box, pair with a gt_box, i.e. find gt_box with highest IoU
        # IoU with paired gt_box, idx of paired gt_box
        paired_ious, paired_gt_idx = proposal_gt_ious.max(dim=1)

        # Filter for IoUs > 0.1
        iou_cond = paired_ious > 0.1

        # Filter for score of proposal > 0.1
        # Get class of paired gt_box
        gt_classes_repeat = gt_classes.repeat(n_proposals, 1)
        paired_gt_classes = gt_classes_repeat[torch.arange(n_proposals),
                                              paired_gt_idx]
        # Get scores of corresponding class
        paired_scores = scores[torch.arange(n_proposals), paired_gt_classes]
        score_cond = paired_scores > 0.1

        # Filter for positive proposals and their corresponding gt labels
        cond = iou_cond & score_cond

        return proposal_boxes[cond], paired_gt_classes[cond].to(self.device)
Example #20
0
    def _match_and_label_boxes(self, proposals, stage, targets):
        """
        Match proposals with groundtruth using the matcher at the given stage.
        Label the proposals as foreground or background based on the match.

        Args:
            proposals (list[Instances]): One Instances for each image, with
                the field "proposal_boxes".
            stage (int): the current stage
            targets (list[Instances]): the ground truth instances

        Returns:
            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
        """
        num_fg_samples, num_bg_samples = [], []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
            # proposal_labels are 0 or 1
            matched_idxs, proposal_labels = self.proposal_matchers[stage](
                match_quality_matrix)
            if len(targets_per_image) > 0:
                gt_classes = targets_per_image.gt_classes[matched_idxs]
                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
                gt_classes[proposal_labels == 0] = self.num_classes
                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
            else:
                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
                gt_boxes = Boxes(
                    targets_per_image.gt_boxes.tensor.new_zeros(
                        (len(proposals_per_image), 4)))
            proposals_per_image.gt_classes = gt_classes
            proposals_per_image.gt_boxes = gt_boxes

            num_fg_samples.append((proposal_labels == 1).sum().item())
            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])

        # Log the number of fg/bg samples in each stage
        storage = get_event_storage()
        storage.put_scalar(
            "stage{}/roi_head/num_fg_samples".format(stage),
            sum(num_fg_samples) / len(num_fg_samples),
        )
        storage.put_scalar(
            "stage{}/roi_head/num_bg_samples".format(stage),
            sum(num_bg_samples) / len(num_bg_samples),
        )
        return proposals
    def rep_box_loss(self):
        # get positive (foreground) proposals (P+ in RepLoss paper)
        bg_class_ind = self.pred_class_logits.shape[1] - 1
        fg_inds = torch.nonzero((self.gt_classes >= 0)
                                & (self.gt_classes < bg_class_ind)).squeeze(1)

        # IOUs here also deal with regressed boxes
        boxes = Boxes(self.predict_boxes_all())

        # set of regressed boxes of positive proposals
        fg_boxes = boxes[fg_inds]

        # index of ground truth box for each positive proposal
        fg_gt_inds = self.gt_box_inds[fg_inds]

        # rep box loss is sum of IOUs of boxes with different GT
        # targets
        num_gts = torch.max(self.gt_box_inds) + 1
        device = self.pred_proposal_deltas.device
        sum = torch.tensor(0.0, device=device)
        num_examples = torch.tensor(0.0, device=device)
        for i in range(num_gts):
            boxes_i = fg_boxes[fg_gt_inds == i]
            for j in range(num_gts):
                boxes_j = fg_boxes[fg_gt_inds == j]
                if i != j:
                    iou_matrix = pairwise_iou(boxes_i, boxes_j)
                    losses = smooth_ln(iou_matrix, sigma=self.rep_box_sigma)
                    sum += torch.sum(iou_matrix)
                    num_examples += 1.0

        # every i,j was counted twice
        sum /= 2.0
        num_examples /= 2.0

        if self.d2_normalize:
            # if 'Detectron2 loss' enabled:
            # as in FastRCNNOutputs:smooth_l1_loss, divide by total examples instead of total
            # foreground examples to weight each foreground example the same
            loss_rep_box = sum / self.gt_classes.numel()
        elif num_examples > 0:
            loss_rep_box = sum / num_examples
        else:
            loss_rep_box = sum  # = 0.0

        # print("loss_rep_box", loss_rep_box)
        return loss_rep_box
Example #22
0
    def forward_cmil(self, C, D, proposals):
        """
        Args:
            x: per-region features of shape (N, ...) for N bounding boxes to predict.

        Returns:
            Tensor: shape (N,K+1), scores for each of the N box. Each row contains the scores for
                K object categories and 1 background class.
            Tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), or (N,4)
                for class-agnostic regression.
        """

        if proposals is None:
            scores = F.softmax(C, dim=1) * F.softmax(D, dim=0)
        elif len(proposals) == 1:
            scores = F.softmax(C, dim=1) * F.softmax(D, dim=0)
        else:
            num_preds_per_image = [len(p) for p in proposals]
            scores = cat(
                [
                    F.softmax(c, dim=1) * F.softmax(d, dim=0)
                    for c, d in zip(
                        C.split(num_preds_per_image, dim=0), D.split(num_preds_per_image, dim=0)
                    )
                ],
                dim=0,
            )

        proposal_deltas = torch.zeros(
            scores.shape[0],
            self.num_bbox_reg_classes * self.box_dim,
            dtype=scores.dtype,
            device=scores.device,
            requires_grad=False,
        )

        # num_preds_per_image = [len(p) for p in proposals]
        rois_obn_score = torch.sum(scores, dim=1, keepdim=True)
        # rois_obn_score = torch.clamp(rois_obn_score, min=1e-6, max=1.0 - 1e-6)

        assert proposals
        J = cat([pairwise_iou(p.proposal_boxes, p.proposal_boxes) for p in proposals], dim=0)

        MC, MD = self.roi_merge(rois_obn_score.cpu(), J.cpu(), C.cpu(), D.cpu())
        return MC.to(C.device), MD.to(D.device), scores, proposal_deltas
    def find_correct_detections(self, detections, ground_truths):
        detected_bbxs = detections['instances'].get('pred_boxes')
        gt_cls_ids = [
            self.internal_dataset_mapping[gt['category_id']]
            for gt in ground_truths
        ]
        gt_cls_ids = torch.tensor(gt_cls_ids).to(detected_bbxs.device)

        # To recheck and use the following condition for efficiency
        # if len(detected_bbxs)==0 or len(ground_truths)==0 or set(gt_cls_ids.tolist())==set([-1]):
        if len(detected_bbxs) == 0 or len(ground_truths) == 0:
            correct = torch.zeros((len(detected_bbxs), ), dtype=torch.bool)
            return correct

        pred_classes = detections['instances'].get('pred_classes')

        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
            for obj in ground_truths if obj["iscrowd"] == 0
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)
        gt_boxes = Boxes(gt_boxes).to(detected_bbxs.device)
        gt_ann_id = [gt['id'] for gt in ground_truths]
        gt_ann_id = torch.tensor(gt_ann_id).to(detected_bbxs.device)

        correct = torch.ones(len(detections['instances']), dtype=torch.bool)
        overlaps = pairwise_iou(detected_bbxs, gt_boxes)
        max_iou, max_iou_indx = torch.max(overlaps, dim=-1)

        correct[max_iou < 0.5] = False
        correct[gt_cls_ids[max_iou_indx] != pred_classes] = False

        # Mark duplicate detections as incorrect
        # navigate through all detections and assign them to a specific annotation/class id
        detected_anns = []
        correct = correct.tolist()
        for i, (g_ann, correct_status) in enumerate(
                zip(gt_ann_id[max_iou_indx].tolist(), correct)):
            if g_ann in detected_anns:
                if correct_status: correct[i] = False
            else:
                if correct_status: detected_anns.append(g_ann)

        correct = torch.tensor(correct, dtype=torch.bool)
        return correct
Example #24
0
    def get_ground_truth(self, points: torch.Tensor, strides, init_boxes,
                         gt_instances):
        centers = torch.cat(points, 0)
        strides = torch.cat(strides, 0)

        init_objectness_labels = []
        init_bbox_labels = []
        cls_labels = []
        refine_bbox_labels = []
        center_scores = []
        for i, targets_per_image in enumerate(gt_instances):
            image_size = targets_per_image.image_size
            centers_invalid = (centers[:, 0] >= image_size[1]).logical_or(
                centers[:, 1] >= image_size[0])

            init_objectness_label, init_bbox_label = self.matcher(
                centers, strides, targets_per_image.gt_boxes)
            init_objectness_label[centers_invalid] = 0

            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes,
                                                Boxes(init_boxes[i]))
            max_qualities, _ = match_quality_matrix.max(1)
            max_qualities = torch.clamp(max_qualities, min=1e-5)
            center_score, _ = (match_quality_matrix /
                               max_qualities[:, None]).max(0)
            gt_matched_idxs, bbox_mached = self.bbox_matcher(
                match_quality_matrix)
            cls_label = targets_per_image.gt_classes[gt_matched_idxs]
            cls_label[bbox_mached == 0] = self.num_classes
            cls_label[centers_invalid] = -1
            refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs]

            init_objectness_labels.append(init_objectness_label)
            init_bbox_labels.append(init_bbox_label)
            cls_labels.append(cls_label)
            refine_bbox_labels.append(refine_bbox_label.tensor)
            center_scores.append(center_score)

        init_objectness_labels = torch.stack(init_objectness_labels)
        init_bbox_labels = torch.stack(init_bbox_labels)
        refine_gt_classes = torch.stack(cls_labels)
        refine_reg_targets = torch.stack(refine_bbox_labels)
        center_scores = torch.stack(center_scores)

        return init_objectness_labels, init_bbox_labels, refine_gt_classes, refine_reg_targets, center_scores
Example #25
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single Boxes per image
        anchors = [BUABoxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes,
                                                       anchors, self.gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = pairwise_iou(gt_boxes_i, anchors_i)
            matched_idxs, gt_objectness_logits_i = self.anchor_matcher(
                match_quality_matrix)

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Example #26
0
def get_nonzeroiou_unionboxes(boxes1, boxes2):

    iou = pairwise_iou(boxes1, boxes2)
    non_zero = (iou > 0).nonzero()
    union_boxes = []
    for i in range(non_zero.shape[0]):
        pre_union_boxes, _ = get_union_box(
            Boxes(boxes1.tensor[non_zero[i][0]:non_zero[i][0] + 1]),
            Boxes(boxes2.tensor[non_zero[i][1]:non_zero[i][1] + 1]))
        union_boxes.append(pre_union_boxes.tensor)
    if union_boxes:
        union_boxes = torch.cat(union_boxes, dim=0)
        second_boxes = boxes2.tensor[torch.sum(iou > 0, dim=0) > 0]
    else:
        device = boxes1.tensor.device
        union_boxes = torch.zeros(0, 4).to(dtype=torch.float32, device=device)
        second_boxes = torch.zeros(0, 4).to(dtype=torch.float32, device=device)

    return Boxes(union_boxes), Boxes(second_boxes)
Example #27
0
    def test_pairwise_iou(self):
        boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])

        boxes2 = torch.tensor([
            [0.0, 0.0, 1.0, 1.0],
            [0.0, 0.0, 0.5, 1.0],
            [0.0, 0.0, 1.0, 0.5],
            [0.0, 0.0, 0.5, 0.5],
            [0.5, 0.5, 1.0, 1.0],
            [0.5, 0.5, 1.5, 1.5],
        ])

        expected_ious = torch.tensor([
            [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
            [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
        ])

        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))

        self.assertTrue(torch.allclose(ious, expected_ious))
Example #28
0
def drop_duplicates(outputs):
    instances = [i for i in range(len(outputs["instances"]))]
    intersect_box = []

    for i in range(len(outputs["instances"].pred_boxes)):
        bboxes_1 = outputs["instances"].pred_boxes[i]
        for j in range(len(outputs["instances"].pred_boxes)):
            bboxes_2 = outputs["instances"].pred_boxes[j]
            if i != j:
                iou = structures.pairwise_iou(bboxes_1, bboxes_2)
                if iou > 0.3:
                    if (outputs["instances"].scores[i] <
                            outputs["instances"].scores[j]):
                        if i not in intersect_box:
                            intersect_box.append(i)

    for intersect in intersect_box:
        instances.remove(intersect)

    return instances
Example #29
0
    def label_and_sample_long_term(self, proposals, targets):
        """
        See :class:`StROIHeads.label_and_sample_proposals`.
        """

        gt_boxes = [x.gt_boxes for x in targets]
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        targets_reference_frame = targets[0]  # == targets_per_image
        proposals_reference_frame = proposals[0]  # == proposals_per_image

        num_gts = len(targets_reference_frame)
        match_quality_matrix = pairwise_iou(
            targets_reference_frame.gt_boxes,
            proposals_reference_frame.proposal_boxes)
        matched_idxs, matched_labels = self.long_term_proposal_matcher(
            match_quality_matrix)

        sampled_idxs = list(range(self.longterm_proposals - num_gts)) + list(
            range(
                len(proposals_reference_frame) - num_gts,
                len(proposals_reference_frame)))
        proposals_reference_frame = proposals_reference_frame[sampled_idxs]

        assert num_gts
        # We index all the attributes of targets that start with "gt_"
        sampled_targets = matched_idxs[sampled_idxs]
        matched_labels = matched_labels[sampled_idxs]

        for (trg_name,
             trg_value) in targets_reference_frame.get_fields().items():
            if trg_name.startswith(
                    "gt_") and not proposals_reference_frame.has(trg_name):
                proposals_reference_frame.set(trg_name,
                                              trg_value[sampled_targets])

        mask = matched_labels == 0
        proposals_reference_frame.gt_id_track[mask] = -1

        return proposals_reference_frame
Example #30
0
    def _merge_overlapping(
        boxes: Boxes,
        classes: torch.LongTensor,
        relation_indexes: torch.LongTensor,
        nms_threshold: float,
    ):
        # Boxes are candidate for merging if their IoU is above a threshold
        iou_above_thres = pairwise_iou(boxes, boxes) > nms_threshold

        # Also, they have to belong to the same class to be candidates.
        # Here we treat "person subj" and "person obj" as two
        # separate classes, to avoid merging cases of "person hugs person"
        # where the two people have high overlap but must remain separate
        obj_idx = relation_indexes[1]
        obj_is_person = classes[obj_idx] == 0
        classes_tmp = classes.clone()
        classes_tmp[obj_idx[obj_is_person]] = -1
        same_class = classes_tmp[:, None] == classes_tmp[None, :]

        candidates = iou_above_thres & same_class

        keep = []
        visited = torch.full((len(boxes), ), False, dtype=torch.bool)
        relation_indexes = relation_indexes.clone()

        for old_box_idx, skip in enumerate(visited):
            if skip:
                continue
            new_box_idx = len(keep)
            keep.append(old_box_idx)

            matches = torch.nonzero(candidates[old_box_idx, :] & ~visited,
                                    as_tuple=True)[0]
            visited[matches] = True

            rel_idx_to_fix = torch.any(
                relation_indexes[:, :, None] == matches[None, None, :], dim=2)
            relation_indexes[rel_idx_to_fix] = new_box_idx

        return boxes[keep], classes[keep], relation_indexes