Exemple #1
0
    def __call__(self, bboxes, gt_bboxes):
        """
        Args:
            bboxes (Tensor): Predicted boxes with unnormalized coordinates
                (x1, y1, x2, y2). Shape [num_query, 4].
            gt_bboxes (Tensor): Ground truth boxes with unnormalized
                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].

        Returns:
            torch.Tensor: iou_cost value with weight
        """
        # overlaps: [num_bboxes, num_gt]
        overlaps = bbox_overlaps(
            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)
        # The 1 is a constant that doesn't change the matching, so omitted.
        iou_cost = -overlaps
        return iou_cost * self.weight
def fast_nms(multi_bboxes,
             multi_scores,
             multi_coeffs,
             score_thr,
             iou_thr,
             top_k,
             max_num=-1):
    """Fast NMS in `YOLACT <https://arxiv.org/abs/1904.02689>`_.

    Fast NMS allows already-removed detections to suppress other detections so
    that every instance can be decided to be kept or discarded in parallel,
    which is not possible in traditional NMS. This relaxation allows us to
    implement Fast NMS entirely in standard GPU-accelerated matrix operations.

    Args:
        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
        multi_scores (Tensor): shape (n, #class+1), where the last column
            contains scores of the background class, but this will be ignored.
        multi_coeffs (Tensor): shape (n, #class*coeffs_dim).
        score_thr (float): bbox threshold, bboxes with scores lower than it
            will not be considered.
        iou_thr (float): IoU threshold to be considered as conflicted.
        top_k (int): if there are more than top_k bboxes before NMS,
            only top top_k will be kept.
        max_num (int): if there are more than max_num bboxes after NMS,
            only top max_num will be kept. If -1, keep all the bboxes.
            Default: -1.

    Returns:
        tuple: (bboxes, labels, coefficients), tensors of shape (k, 5), (k, 1),
            and (k, coeffs_dim). Labels are 0-based.
    """

    scores = multi_scores[:, :-1].t()  # [#class, n]
    scores, idx = scores.sort(1, descending=True)

    idx = idx[:, :top_k].contiguous()
    scores = scores[:, :top_k]  # [#class, topk]
    num_classes, num_dets = idx.size()
    boxes = multi_bboxes[idx.view(-1), :].view(num_classes, num_dets, 4)
    coeffs = multi_coeffs[idx.view(-1), :].view(num_classes, num_dets, -1)

    iou = bbox_overlaps(boxes, boxes)  # [#class, topk, topk]
    iou.triu_(diagonal=1)
    iou_max, _ = iou.max(dim=1)

    # Now just filter out the ones higher than the threshold
    keep = iou_max <= iou_thr

    # Second thresholding introduces 0.2 mAP gain at negligible time cost
    keep *= scores > score_thr

    # Assign each kept detection to its corresponding class
    classes = torch.arange(
        num_classes, device=boxes.device)[:, None].expand_as(keep)
    classes = classes[keep]

    boxes = boxes[keep]
    coeffs = coeffs[keep]
    scores = scores[keep]

    # Only keep the top max_num highest scores across all classes
    scores, idx = scores.sort(0, descending=True)
    if max_num > 0:
        idx = idx[:max_num]
        scores = scores[:max_num]

    classes = classes[idx]
    boxes = boxes[idx]
    coeffs = coeffs[idx]

    cls_dets = torch.cat([boxes, scores[:, None]], dim=1)
    return cls_dets, classes, coeffs
Exemple #3
0
    def loss(self,
             cls_scores,
             bbox_preds,
             iou_preds,
             gt_bboxes,
             gt_labels,
             img_metas,
             gt_bboxes_ignore=None):
        """Compute losses of the head.

        Args:
            cls_scores (list[Tensor]): Box scores for each scale level
                Has shape (N, num_anchors * num_classes, H, W)
            bbox_preds (list[Tensor]): Box energies / deltas for each scale
                level with shape (N, num_anchors * 4, H, W)
            iou_preds (list[Tensor]): iou_preds for each scale
                level with shape (N, num_anchors * 1, H, W)
            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
            gt_labels (list[Tensor]): class indices corresponding to each box
            img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.
            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding
                boxes can be ignored when are computing the loss.

        Returns:
            dict[str, Tensor]: A dictionary of loss gmm_assignment.
        """

        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        assert len(featmap_sizes) == self.anchor_generator.num_levels

        device = cls_scores[0].device
        anchor_list, valid_flag_list = self.get_anchors(featmap_sizes,
                                                        img_metas,
                                                        device=device)
        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
        cls_reg_targets = self.get_targets(
            anchor_list,
            valid_flag_list,
            gt_bboxes,
            img_metas,
            gt_bboxes_ignore_list=gt_bboxes_ignore,
            gt_labels_list=gt_labels,
            label_channels=label_channels,
        )
        (labels, labels_weight, bboxes_target, bboxes_weight, pos_inds,
         pos_gt_index) = cls_reg_targets
        cls_scores = levels_to_images(cls_scores)
        cls_scores = [
            item.reshape(-1, self.cls_out_channels) for item in cls_scores
        ]
        bbox_preds = levels_to_images(bbox_preds)
        bbox_preds = [item.reshape(-1, 4) for item in bbox_preds]
        iou_preds = levels_to_images(iou_preds)
        iou_preds = [item.reshape(-1, 1) for item in iou_preds]
        pos_losses_list, = multi_apply(self.get_pos_loss, anchor_list,
                                       cls_scores, bbox_preds, labels,
                                       labels_weight, bboxes_target,
                                       bboxes_weight, pos_inds)

        with torch.no_grad():
            labels, label_weights, bbox_weights, num_pos = multi_apply(
                self.paa_reassign,
                pos_losses_list,
                labels,
                labels_weight,
                bboxes_weight,
                pos_inds,
                pos_gt_index,
                anchor_list,
            )
            num_pos = sum(num_pos)
            if num_pos == 0:
                num_pos = len(img_metas)
        # convert all tensor list to a flatten tensor
        cls_scores = torch.cat(cls_scores, 0).view(-1, cls_scores[0].size(-1))
        bbox_preds = torch.cat(bbox_preds, 0).view(-1, bbox_preds[0].size(-1))
        iou_preds = torch.cat(iou_preds, 0).view(-1, iou_preds[0].size(-1))
        labels = torch.cat(labels, 0).view(-1)
        flatten_anchors = torch.cat(
            [torch.cat(item, 0) for item in anchor_list])
        labels_weight = torch.cat(labels_weight, 0).view(-1)
        bboxes_target = torch.cat(bboxes_target,
                                  0).view(-1, bboxes_target[0].size(-1))

        pos_inds_flatten = ((labels >= 0)
                            &
                            (labels < self.num_classes)).nonzero().reshape(-1)

        losses_cls = self.loss_cls(cls_scores,
                                   labels,
                                   labels_weight,
                                   avg_factor=num_pos)
        if num_pos:
            pos_bbox_pred = self.bbox_coder.decode(
                flatten_anchors[pos_inds_flatten],
                bbox_preds[pos_inds_flatten])
            pos_bbox_target = bboxes_target[pos_inds_flatten]
            iou_target = bbox_overlaps(pos_bbox_pred.detach(),
                                       pos_bbox_target,
                                       is_aligned=True)
            losses_iou = self.loss_centerness(iou_preds[pos_inds_flatten],
                                              iou_target.unsqueeze(-1),
                                              avg_factor=num_pos)
            losses_bbox = self.loss_bbox(pos_bbox_pred,
                                         pos_bbox_target,
                                         iou_target.clamp(min=eps),
                                         avg_factor=iou_target.sum())
        else:
            losses_iou = iou_preds.sum() * 0
            losses_bbox = bbox_preds.sum() * 0

        return dict(loss_cls=losses_cls,
                    loss_bbox=losses_bbox,
                    loss_iou=losses_iou)
Exemple #4
0
    def score_voting(self, det_bboxes, det_labels, mlvl_bboxes,
                     mlvl_nms_scores, score_thr):
        """Implementation of score voting method works on each remaining boxes
        after NMS procedure.

        Args:
            det_bboxes (Tensor): Remaining boxes after NMS procedure,
                with shape (k, 5), each dimension means
                (x1, y1, x2, y2, score).
            det_labels (Tensor): The label of remaining boxes, with shape
                (k, 1),Labels are 0-based.
            mlvl_bboxes (Tensor): All boxes before the NMS procedure,
                with shape (num_anchors,4).
            mlvl_nms_scores (Tensor): The scores of all boxes which is used
                in the NMS procedure, with shape (num_anchors, num_class)
            mlvl_iou_preds (Tensot): The predictions of IOU of all boxes
                before the NMS procedure, with shape (num_anchors, 1)
            score_thr (float): The score threshold of bboxes.

        Returns:
            tuple: Usually returns a tuple containing voting results.

                - det_bboxes_voted (Tensor): Remaining boxes after
                    score voting procedure, with shape (k, 5), each
                    dimension means (x1, y1, x2, y2, score).
                - det_labels_voted (Tensor): Label of remaining bboxes
                    after voting, with shape (num_anchors,).
        """
        candidate_mask = mlvl_nms_scores > score_thr
        candidate_mask_nozeros = candidate_mask.nonzero()
        candidate_inds = candidate_mask_nozeros[:, 0]
        candidate_labels = candidate_mask_nozeros[:, 1]
        candidate_bboxes = mlvl_bboxes[candidate_inds]
        candidate_scores = mlvl_nms_scores[candidate_mask]
        det_bboxes_voted = []
        det_labels_voted = []
        for cls in range(self.cls_out_channels):
            candidate_cls_mask = candidate_labels == cls
            if not candidate_cls_mask.any():
                continue
            candidate_cls_scores = candidate_scores[candidate_cls_mask]
            candidate_cls_bboxes = candidate_bboxes[candidate_cls_mask]
            det_cls_mask = det_labels == cls
            det_cls_bboxes = det_bboxes[det_cls_mask].view(
                -1, det_bboxes.size(-1))
            det_candidate_ious = bbox_overlaps(det_cls_bboxes[:, :4],
                                               candidate_cls_bboxes)
            for det_ind in range(len(det_cls_bboxes)):
                single_det_ious = det_candidate_ious[det_ind]
                pos_ious_mask = single_det_ious > 0.01
                pos_ious = single_det_ious[pos_ious_mask]
                pos_bboxes = candidate_cls_bboxes[pos_ious_mask]
                pos_scores = candidate_cls_scores[pos_ious_mask]
                pis = (torch.exp(-(1 - pos_ious)**2 / 0.025) *
                       pos_scores)[:, None]
                voted_box = torch.sum(pis * pos_bboxes, dim=0) / torch.sum(
                    pis, dim=0)
                voted_score = det_cls_bboxes[det_ind][-1:][None, :]
                det_bboxes_voted.append(
                    torch.cat((voted_box[None, :], voted_score), dim=1))
                det_labels_voted.append(cls)

        det_bboxes_voted = torch.cat(det_bboxes_voted, dim=0)
        det_labels_voted = det_labels.new_tensor(det_labels_voted)
        return det_bboxes_voted, det_labels_voted
Exemple #5
0
    def get_pos_loss(self, anchors, cls_score, bbox_pred, label, label_weight,
                     bbox_target, bbox_weight, pos_inds):
        """Calculate loss of all potential positive samples obtained from first
        match process.
        Args:
            anchors (list[Tensor]): Anchors of each scale.
            cls_score (Tensor): Box scores of single image with shape
                (num_anchors, num_classes)
            bbox_pred (Tensor): Box energies / deltas of single image
                with shape (num_anchors, 4)
            label (Tensor): classification target of each anchor with
                shape (num_anchors,)
            label_weight (Tensor): Classification loss weight of each
                anchor with shape (num_anchors).
            bbox_target (dict): Regression target of each anchor with
                shape (num_anchors, 4).
            bbox_weight (Tensor): Bbox weight of each anchor with shape
                (num_anchors, 4).
            pos_inds (Tensor): Index of all positive samples got from
                first assign process.
        Returns:
            Tensor: Losses of all positive samples in single image.
        """
        if not len(pos_inds):
            return cls_score.new([]),

        num_total_pos = max(pos_inds.numel(), 1)

        anchors_all_level = torch.cat(anchors, 0)
        pos_scores = cls_score[pos_inds]
        pos_bbox_pred = bbox_pred[pos_inds]
        pos_label = label[pos_inds]
        pos_label_weight = label_weight[pos_inds]
        pos_bbox_target = bbox_target[pos_inds]
        pos_bbox_weight = bbox_weight[pos_inds]
        pos_anchors = anchors_all_level[pos_inds]
        pos_bbox_pred = self.bbox_coder.decode(pos_anchors, pos_bbox_pred)

        score = label_weight.new_zeros(label.shape)
        score[pos_inds] = bbox_overlaps(pos_bbox_pred.detach(),
                                        pos_bbox_target,
                                        is_aligned=True)
        # to keep loss dimension
        # loss_cls = self.loss_cls(
        #     pos_scores,
        #     pos_label,
        #     pos_label_weight,
        #     avg_factor=self.loss_cls.loss_weight,
        #     reduction_override='none')

        loss_bbox = self.loss_bbox(pos_bbox_pred,
                                   pos_bbox_target,
                                   pos_bbox_weight,
                                   avg_factor=self.loss_cls.loss_weight,
                                   reduction_override='none')

        # loss_qfl = self.loss_qfl(cls_score, (label, score),
        #     weight=label_weight,
        #     avg_factor=num_total_pos)

        # loss_cls = loss_cls.sum(-1)
        pos_loss = loss_bbox
        return pos_loss,
    def forward_test(self, img, img_metas, proposals=None, rescale=False):
        # for model in self.models[:-1]:
        #     x = F.relu(model(x))
        # x = self.models[-1](x) # don't use relu for last model


        # TODO: Check this
        with torch.no_grad():
            t2 = time()
            x1 = self.models[0](img, img_metas, return_loss=False, rescale=True)
            x2 = self.models[1](img, img_metas, return_loss=False, rescale=True)
            print(time()-t2)
        o = [[np.concatenate(r_c) for r_c in zip(*r_img)] for r_img in zip(x1, x2)]

        o_cars = torch.tensor(o[0][0], dtype=torch.float32).cuda()
        print()

        ## TODO: Add center distance

        o_cars = [torch.tensor(x1[0][0]), torch.tensor(x2[0][0])]
        K = x1[0][0].shape[0]
        N = x2[0][0].shape[0]
        F = 3
        T = np.zeros(((K, N, F)),  dtype=np.float32) # TODO: torch.zeros Avoid numpy

        t = time()
        overlaps = bbox_overlaps(o_cars[0][:, :4], o_cars[1][:, :4])
        scores_1 = o_cars[0][:, 4].unsqueeze(1).repeat((1, N))
        scores_2 = o_cars[1][:, 4].unsqueeze(1).repeat((1, K)).T
        T[:, :, 0] = overlaps
        T[:, :, 1] = scores_1
        T[:, :, 2] = scores_2

        T = torch.tensor(T).cuda()
        non_empty_indices = torch.nonzero(T[:, :, 0])
        non_empty_indices = torch.nonzero(T[:, :, 0], as_tuple=True)

        # flat_T = T.reshape(-1, F)
        # non_empty_elements = flat_T[torch.nonzero(flat_T[:, 0], as_tuple=True)]

        non_empty_elements = T[non_empty_indices[0], non_empty_indices[1], :]

        non_empty_elements_T = non_empty_elements.permute(1, 0)
        non_empty_elements_T = non_empty_elements_T.unsqueeze(1).unsqueeze(0).cuda()    # Shape [1,3,1, #non-zero]

        T_out = torch.zeros((1, K, N)).cuda()

        new_scores = self.fusion(non_empty_elements_T, T_out, non_empty_indices)
        x1[0][0][:, 4] = new_scores.cpu()


        # print(time()-t)
        # for k in o_cars[0]:
        #     for n in o_cars[1]:
        #         iou = bbox_overlaps(k[:4], n[:4])





        # x2 = [ [x2[0][ ]   ,   []]
        # o = []
        # for r_img in zip(x1, x2):
        #     o_img = []
        #     for r_c in zip(*r_img):
        #         o_c = np.concatenate(r_c)
        #         o_img.append(o_c)
        #     o.append(o_img)

        # [torch.cat(r_c)  for r_c in zip(r_img)   for r_img in zip(x1,x2)]
        # r = torch.cat([x1, x2])

        return o, x1
    def forward_train(self,
                      img,
                      img_metas,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None,
                      **kwargs):
        # for model in self.models[:-1]:
        #     x = F.relu(model(x))
        # x = self.models[-1](x) # don't use relu for last model

        # TODO: Check this
        with torch.no_grad():

            # torch.backends.cudnn.enabled = False  # This solves the error of using different types of GPU
            t2 = time()
            x1 = self.models[0]([img], [img_metas], return_loss=False, rescale=True)
            # print("Faster:", time() - t2)
            t2 = time()
            # img2 = img.to('cuda:1')
            x2 = self.models[1]([img], [img_metas], return_loss=False, rescale=True)
            # print("Retina:", time()-t2)
        # o = [[np.concatenate(r_c) for r_c in zip(*r_img)] for r_img in zip(x1, x2)]
        #
        # o_cars = torch.tensor(o[0][0], dtype=torch.float16).cuda()
        # print()


        ## TODO: Add center distance

        # TODO: Esta parte (Tensor preparation) es muy lenta
        t = time()
        x1[0][0] = x1[0][0][x1[0][0][:, 4].argsort()][::-1][:1000].copy()
        x2[0][0] = x2[0][0][x2[0][0][:, 4].argsort()][::-1][:1000].copy()


        o_cars = [torch.tensor(x1[0][0]), torch.tensor(x2[0][0])]
        K = x1[0][0].shape[0]
        N = x2[0][0].shape[0]
        F = 3
        T = np.zeros(((K, N, F)), dtype=np.float32)  # TODO: torch.zeros Avoid numpy Float 16

        t = time()
        overlaps = bbox_overlaps(o_cars[0][:, :4], o_cars[1][:, :4])

        # print("BBox overlaps:", time() - t)
        scores_1 = o_cars[0][:, 4].unsqueeze(1).repeat((1, N))
        scores_2 = o_cars[1][:, 4].unsqueeze(1).repeat((1, K)).T
        T[:, :, 0] = overlaps
        T[:, :, 1] = scores_1
        T[:, :, 2] = scores_2

        T = torch.tensor(T).cuda().half()

        # Fill last element of column with all IoU zeros with -1
        non_overlapping_dets = ~overlaps.sum(dim=1).bool()
        T[non_overlapping_dets, -1, 0] = -1   # IoU -1
        T[non_overlapping_dets, -1, -1] = -1  # Score 2nd -1

        non_empty_indices = torch.nonzero(T[:, :, 0])
        non_empty_indices = torch.nonzero(T[:, :, 0], as_tuple=True)

        # flat_T = T.reshape(-1, F)
        # non_empty_elements = flat_T[torch.nonzero(flat_T[:, 0], as_tuple=True)]

        non_empty_elements = T[non_empty_indices[0], non_empty_indices[1], :]

        non_empty_elements_T = non_empty_elements.permute(1, 0)
        non_empty_elements_T = non_empty_elements_T.unsqueeze(1).unsqueeze(0).cuda()  # Shape [1,3,1, #non-zero]

        T_out = torch.zeros((1, K, N)).cuda().half()
        # print("Tensor preparation:", time() - t)

        t2 = time()
        new_scores = self.fusion(non_empty_elements_T, T_out, non_empty_indices)
        # print("Fusion:", time() - t2)

        # TODO: Uncomment
        # x1[0][0][:, 4] = new_scores.cpu().detach().numpy()

        bboxes = x1     # [# images, #n_classes, # n_boxes]
        losses = dict()

        # assign_result = [self.assigner.assign(
        #     x[0], gt_bboxes[0], gt_bboxes_ignore, gt_labels[0]) for x in x1]
        #
        # sampling_result = self.sampler.sample(assign_result, anchors,
        #
        #                                       gt_bboxes)
        t2 = time()
        num_imgs = len(img_metas)
        if gt_bboxes_ignore is None:
            gt_bboxes_ignore = [None for _ in range(num_imgs)]
        sampling_results = []
        for i in range(num_imgs):
            assign_result = self.assigner.assign(
                torch.tensor(bboxes[i][0]).cuda(), gt_bboxes[i], gt_bboxes_ignore[i],
                gt_labels[i])
            sampling_result = self.sampler.sample(
                assign_result,
                torch.tensor(bboxes[i][0]).cuda(),
                gt_bboxes[i],)
                # gt_labels[i])
            sampling_results.append(sampling_result)


        bbox_targets = self.get_targets(sampling_results, gt_bboxes,
                                   gt_labels, rcnn_train_cfg=None)

        # cls_score = torch.tensor(1 - x1[0][0][:, 4], requires_grad=True).cuda()
        cls_score = 1 - new_scores
        loss_bbox = self.loss(cls_score, *bbox_targets)
        losses.update(loss_bbox)
        # print("Loss Assigner:", time() - t2)

        # loss_cls = dict(
        #     type='CrossEntropyLoss',
        #     use_sigmoid=False,
        #     loss_weight=1.0)

        # loss_cls=dict(
        #     type='FocalLoss',
        #     use_sigmoid=True,
        #     gamma=2.0,
        #     alpha=0.25,
        #     loss_weight=1.0)

        # self.loss_cls = build_loss(loss_cls)
        # self.loss_cls(
        #     cls_score,
        #     bbox_targets[0],
        #     bbox_targets[1],
        #     avg_factor=1,
        #     reduction_override=None)

        return losses