Esempio n. 1
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        anchors = F.concat(anchors_list, axis=0)
        labels_list = []
        offsets_list = []

        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
            matched_indices, labels = self.matcher(overlaps)

            offsets = self.box_coder.encode(anchors,
                                            gt_boxes[matched_indices, :4])

            # sample positive labels
            num_positive = int(self.cfg.num_sample_anchors *
                               self.cfg.positive_anchor_ratio)
            labels = layers.sample_labels(labels, num_positive, 1, -1)
            # sample negative labels
            num_positive = (labels == 1).sum().astype(np.int32)
            num_negative = self.cfg.num_sample_anchors - num_positive
            labels = layers.sample_labels(labels, num_negative, 0, -1)

            labels_list.append(labels)
            offsets_list.append(offsets)

        return (
            F.concat(labels_list, axis=0).detach(),
            F.concat(offsets_list, axis=0).detach(),
        )
Esempio n. 2
0
    def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
        if not self.training:
            return rpn_rois, None, None

        return_rois = []
        return_labels = []
        return_bbox_targets = []

        # get per image proposals and gt_boxes
        for bid in range(gt_boxes.shape[0]):
            num_valid_boxes = im_info[bid, 4].astype("int32")
            gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
            batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid)
            gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
            batch_roi_mask = rpn_rois[:, 0] == bid
            # all_rois : [batch_id, x1, y1, x2, y2]
            all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois])

            overlaps = layers.get_iou(all_rois[:, 1:5], gt_boxes_per_img)

            max_overlaps = overlaps.max(axis=1)
            gt_assignment = F.argmax(overlaps, axis=1).astype("int32")
            labels = gt_boxes_per_img[gt_assignment, 4]

            # ---------------- get the fg/bg labels for each roi ---------------#
            fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0)
            bg_mask = ((max_overlaps >= self.cfg.bg_threshold_low)
                       & (max_overlaps < self.cfg.bg_threshold_high))

            num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio)
            fg_inds_mask = layers.sample_mask_from_labels(
                fg_mask, num_fg_rois, 1)
            num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum())
            bg_inds_mask = layers.sample_mask_from_labels(
                bg_mask, num_bg_rois, 1)

            labels = labels * fg_inds_mask

            keep_mask = fg_inds_mask + bg_inds_mask
            _, keep_inds = F.cond_take(keep_mask == 1, keep_mask)
            # Add next line to avoid memory exceed
            keep_inds = keep_inds[:min(self.cfg.num_rois, keep_inds.shape[0])]

            labels = labels[keep_inds].astype("int32")
            rois = all_rois[keep_inds]
            target_boxes = gt_boxes_per_img[gt_assignment[keep_inds], :4]
            bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes)
            bbox_targets = bbox_targets.reshape(-1, 4)

            return_rois.append(rois)
            return_labels.append(labels)
            return_bbox_targets.append(bbox_targets)

        return (
            F.concat(return_rois, axis=0).detach(),
            F.concat(return_labels, axis=0).detach(),
            F.concat(return_bbox_targets, axis=0).detach(),
        )
Esempio n. 3
0
    def get_ground_truth(self, anchors, batched_gt_boxes, batched_valid_gt_box_number):
        total_anchors = anchors.shape[0]
        labels_cat_list = []
        bbox_targets_list = []

        for b_id in range(self.batch_size):
            gt_boxes = batched_gt_boxes[b_id, : batched_valid_gt_box_number[b_id]]

            overlaps = layers.get_iou(anchors, gt_boxes[:, :4])
            argmax_overlaps = F.argmax(overlaps, axis=1)

            max_overlaps = overlaps.ai[
                F.linspace(0, total_anchors - 1, total_anchors).astype(np.int32),
                argmax_overlaps,
            ]

            labels = mge.tensor([-1]).broadcast(total_anchors)
            labels = labels * (max_overlaps >= self.cfg.negative_thresh)
            labels = labels * (max_overlaps < self.cfg.positive_thresh) + (
                max_overlaps >= self.cfg.positive_thresh
            )

            bbox_targets = self.box_coder.encode(
                anchors, gt_boxes.ai[argmax_overlaps, :4]
            )

            labels_cat = gt_boxes.ai[argmax_overlaps, 4]
            labels_cat = labels_cat * (1.0 - F.less_equal(F.abs(labels), 1e-5))
            ignore_mask = F.less_equal(F.abs(labels + 1), 1e-5)
            labels_cat = labels_cat * (1 - ignore_mask) - ignore_mask

            # assign low_quality boxes
            if self.cfg.allow_low_quality:
                gt_argmax_overlaps = F.argmax(overlaps, axis=0)
                labels_cat = labels_cat.set_ai(gt_boxes[:, 4])[gt_argmax_overlaps]
                matched_low_bbox_targets = self.box_coder.encode(
                    anchors.ai[gt_argmax_overlaps, :], gt_boxes[:, :4]
                )
                bbox_targets = bbox_targets.set_ai(matched_low_bbox_targets)[
                    gt_argmax_overlaps, :
                ]

            labels_cat_list.append(F.add_axis(labels_cat, 0))
            bbox_targets_list.append(F.add_axis(bbox_targets, 0))

        return (
            F.zero_grad(F.concat(labels_cat_list, axis=0)),
            F.zero_grad(F.concat(bbox_targets_list, axis=0)),
        )
Esempio n. 4
0
    def get_ground_truth(self, anchors, batched_gt_boxes, batched_num_gts):
        labels_list = []
        offsets_list = []

        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
            match_indices, labels = self.matcher(overlaps)
            gt_boxes_matched = gt_boxes[match_indices]

            fg_mask = labels == 1
            labels[fg_mask] = gt_boxes_matched[fg_mask, 4].astype(np.int32)
            offsets = self.box_coder.encode(anchors, gt_boxes_matched[:, :4])

            labels_list.append(labels)
            offsets_list.append(offsets)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
        )
Esempio n. 5
0
 def per_level_gt(self,
                  gt_boxes,
                  im_info,
                  anchors,
                  allow_low_quality_matches=True):
     ignore_label = self.cfg.ignore_label
     # get the gt boxes
     valid_gt_boxes = gt_boxes[:im_info[4], :]
     # compute the iou matrix
     overlaps = layers.get_iou(anchors, valid_gt_boxes[:, :4])
     # match the dtboxes
     a_shp0 = anchors.shape[0]
     max_overlaps = F.max(overlaps, axis=1)
     argmax_overlaps = F.argmax(overlaps, axis=1)
     # all ignore
     labels = mge.ones(a_shp0).astype("int32") * ignore_label
     # set negative ones
     labels = labels * (max_overlaps >= self.cfg.rpn_negative_overlap)
     # set positive ones
     fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap
     const_one = mge.tensor(1.0)
     if allow_low_quality_matches:
         # make sure that max iou of gt matched
         gt_argmax_overlaps = F.argmax(overlaps, axis=0)
         num_valid_boxes = valid_gt_boxes.shapeof(0)
         gt_id = F.linspace(0, num_valid_boxes - 1,
                            num_valid_boxes).astype("int32")
         argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps]
         max_overlaps = max_overlaps.set_ai(
             const_one.broadcast(num_valid_boxes))[gt_argmax_overlaps]
         fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap
     # set positive ones
     _, fg_mask_ind = F.cond_take(fg_mask == 1, fg_mask)
     labels = labels.set_ai(const_one.broadcast(
         fg_mask_ind.shapeof(0)))[fg_mask_ind]
     # compute the targets
     bbox_targets = self.box_coder.encode(
         anchors, valid_gt_boxes.ai[argmax_overlaps, :4])
     return labels, bbox_targets
Esempio n. 6
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            ious = []
            candidate_idxs = []
            base = 0
            for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                ious.append(
                    layers.get_iou(
                        gt_boxes[:, :4],
                        F.concat([
                            anchors_i - stride * self.cfg.anchor_scale / 2,
                            anchors_i + stride * self.cfg.anchor_scale / 2,
                        ],
                                 axis=1)))
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                distances = F.sqrt(
                    F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2,
                          axis=2))
                _, topk_idxs = F.topk(distances, self.cfg.anchor_topk)
                candidate_idxs.append(base + topk_idxs)
                base += anchors_i.shape[0]
            ious = F.concat(ious, axis=1)
            candidate_idxs = F.concat(candidate_idxs, axis=1)

            candidate_ious = F.gather(ious, 1, candidate_idxs)
            ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) +
                        F.std(candidate_ious, axis=1, keepdims=True))
            is_foreground = F.scatter(
                F.zeros(ious.shape), 1, candidate_idxs,
                F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr)

            is_in_boxes = F.min(self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)),
                                axis=2) > 0

            ious[~is_foreground] = -1
            ious[~is_in_boxes] = -1

            match_indices = F.argmax(ious, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_max_iou == -1] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1),
                       lower=0) *
                F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1),
                       lower=0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )
Esempio n. 7
0
    def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes,
                   im_info):
        # pylint: disable=too-many-statements
        def positive_bag_loss(logits, axis=1):
            weight = 1.0 / (1.0 - logits)
            weight /= weight.sum(axis=axis, keepdims=True)
            bag_prob = (weight * logits).sum(axis=1)
            return -layers.safelog(bag_prob)

        def negative_bag_loss(logits, gamma):
            return (logits**gamma) * (-layers.safelog(1.0 - logits))

        pred_scores = F.sigmoid(pred_logits)
        box_prob_list = []
        positive_losses = []
        clamp_eps = 1e-7
        bucket_size = self.cfg.bucket_size

        for bid in range(im_info.shape[0]):
            boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")]
            # id 0 is used for background classes, so -1 first
            labels = boxes_info[:, 4].astype("int32") - 1

            pred_box = self.box_coder.decode(anchors,
                                             pred_offsets[bid]).detach()
            overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach()
            thresh1 = self.cfg.box_iou_threshold
            thresh2 = F.clip(overlaps.max(axis=1, keepdims=True),
                             lower=thresh1 + clamp_eps,
                             upper=1.0)
            gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1),
                                  lower=0,
                                  upper=1.0)

            image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach()
            # guarantee that nonzero_idx is not empty
            if gt_pred_prob.max() > clamp_eps:
                _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob)
                # since nonzeros is only 1 dim, use num_anchor to get real indices
                num_anchors = gt_pred_prob.shape[1]
                anchors_idx = nonzero_idx % num_anchors
                gt_idx = nonzero_idx // num_anchors
                image_boxes_prob[anchors_idx,
                                 labels[gt_idx]] = gt_pred_prob[gt_idx,
                                                                anchors_idx]

            box_prob_list.append(image_boxes_prob)

            # construct bags for objects
            match_quality_matrix = layers.get_iou(boxes_info[:, :4],
                                                  anchors).detach()
            num_gt = match_quality_matrix.shape[0]
            _, matched_idx = F.topk(
                match_quality_matrix,
                k=bucket_size,
                descending=True,
                no_sort=True,
            )

            matched_idx = matched_idx.detach()
            matched_idx_flatten = matched_idx.reshape(-1)
            gather_idx = labels.reshape(-1, 1)
            gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size))

            gather_src = pred_scores[bid, matched_idx_flatten]
            gather_src = gather_src.reshape(num_gt, bucket_size, -1)
            matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2)

            topk_anchors = anchors[matched_idx_flatten]
            boxes_broad_cast = F.broadcast_to(
                F.expand_dims(boxes_info[:, :4], axis=1),
                (num_gt, bucket_size, 4)).reshape(-1, 4)

            matched_offsets = self.box_coder.encode(topk_anchors,
                                                    boxes_broad_cast)

            reg_loss = layers.smooth_l1_loss(
                pred_offsets[bid, matched_idx_flatten],
                matched_offsets,
                beta=self.cfg.smooth_l1_beta).sum(
                    axis=-1) * self.cfg.reg_loss_weight
            matched_reg_scores = F.exp(-reg_loss)

            positive_losses.append(
                positive_bag_loss(matched_score *
                                  matched_reg_scores.reshape(-1, bucket_size),
                                  axis=1))

        num_foreground = im_info[:, 4].sum()
        pos_loss = F.concat(positive_losses).sum() / F.maximum(
            1.0, num_foreground)
        box_probs = F.stack(box_prob_list, axis=0)

        neg_loss = negative_bag_loss(
            pred_scores *
            (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum(
                1.0, num_foreground * bucket_size)

        alpha = self.cfg.focal_loss_alpha
        pos_loss = pos_loss * alpha
        neg_loss = neg_loss * (1 - alpha)
        loss_dict = {
            "total_loss": pos_loss + neg_loss,
            "pos_loss": pos_loss,
            "neg_loss": neg_loss,
        }
        return loss_dict
Esempio n. 8
0
    def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
        if not self.training:
            return rpn_rois, None, None

        return_rois = []
        return_labels = []
        return_bbox_targets = []

        # get per image proposals and gt_boxes
        for bid in range(self.cfg.batch_per_gpu):
            num_valid_boxes = im_info[bid, 4]
            gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
            batch_inds = mge.ones((gt_boxes_per_img.shapeof(0), 1)) * bid
            # if config.proposal_append_gt:
            gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
            batch_roi_mask = rpn_rois[:, 0] == bid
            _, batch_roi_inds = F.cond_take(batch_roi_mask == 1, batch_roi_mask)
            # all_rois : [batch_id, x1, y1, x2, y2]
            all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois])

            overlaps_normal, overlaps_ignore = layers.get_iou(
                all_rois[:, 1:5], gt_boxes_per_img, return_ignore=True,
            )

            max_overlaps_normal = overlaps_normal.max(axis=1)
            gt_assignment_normal = F.argmax(overlaps_normal, axis=1)

            max_overlaps_ignore = overlaps_ignore.max(axis=1)
            gt_assignment_ignore = F.argmax(overlaps_ignore, axis=1)

            ignore_assign_mask = (max_overlaps_normal < self.cfg.fg_threshold) * (
                max_overlaps_ignore > max_overlaps_normal
            )
            max_overlaps = (
                max_overlaps_normal * (1 - ignore_assign_mask)
                + max_overlaps_ignore * ignore_assign_mask
            )
            gt_assignment = (
                gt_assignment_normal * (1 - ignore_assign_mask)
                + gt_assignment_ignore * ignore_assign_mask
            )
            gt_assignment = gt_assignment.astype("int32")
            labels = gt_boxes_per_img.ai[gt_assignment, 4]

            # ---------------- get the fg/bg labels for each roi ---------------#
            fg_mask = (max_overlaps >= self.cfg.fg_threshold) * (
                labels != self.cfg.ignore_label
            )
            bg_mask = (max_overlaps < self.cfg.bg_threshold_high) * (
                max_overlaps >= self.cfg.bg_threshold_low
            )

            num_fg_rois = self.cfg.num_rois * self.cfg.fg_ratio

            fg_inds_mask = self._bernoulli_sample_masks(fg_mask, num_fg_rois, 1)
            num_bg_rois = self.cfg.num_rois - fg_inds_mask.sum()
            bg_inds_mask = self._bernoulli_sample_masks(bg_mask, num_bg_rois, 1)

            labels = labels * fg_inds_mask

            keep_mask = fg_inds_mask + bg_inds_mask
            _, keep_inds = F.cond_take(keep_mask == 1, keep_mask)
            # Add next line to avoid memory exceed
            keep_inds = keep_inds[: F.minimum(self.cfg.num_rois, keep_inds.shapeof(0))]
            # labels
            labels = labels.ai[keep_inds].astype("int32")
            rois = all_rois.ai[keep_inds]
            target_boxes = gt_boxes_per_img.ai[gt_assignment.ai[keep_inds], :4]
            bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes)
            bbox_targets = bbox_targets.reshape(-1, 4)

            return_rois.append(rois)
            return_labels.append(labels)
            return_bbox_targets.append(bbox_targets)

        return (
            F.zero_grad(F.concat(return_rois, axis=0)),
            F.zero_grad(F.concat(return_labels, axis=0)),
            F.zero_grad(F.concat(return_bbox_targets, axis=0)),
        )