Esempio n. 1
0
 def loss_single(self, cls_score, bbox_pred, rois, labels, label_weights,
                 bbox_targets, bbox_weights, num_total_samples, cfg):
     # classification loss
     if self.with_cls:
         labels = labels.reshape(-1)
         label_weights = label_weights.reshape(-1)
         cls_score = cls_score.permute(0, 2, 3,
                                       1).reshape(-1, self.cls_out_channels)
         loss_cls = self.loss_cls(cls_score,
                                  labels,
                                  label_weights,
                                  avg_factor=num_total_samples)
     # regression loss
     bbox_targets = bbox_targets.reshape(-1, 4)
     bbox_weights = bbox_weights.reshape(-1, 4)
     bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
     if self.use_iou_reg:
         # convert delta to bbox
         rois = rois.reshape(-1, 4)
         bbox_pred = delta2bbox(rois, bbox_pred, self.target_means,
                                self.target_stds)
         bbox_targets = delta2bbox(rois, bbox_targets, self.target_means,
                                   self.target_stds)
     loss_reg = self.loss_bbox(bbox_pred,
                               bbox_targets,
                               bbox_weights,
                               avg_factor=num_total_samples)
     if self.with_cls:
         return loss_cls, loss_reg
     return None, loss_reg
Esempio n. 2
0
    def regress_by_class(self, rois, label, bbox_pred, img_meta):
        """Regress the bbox for the predicted class. Used in Cascade R-CNN.

        Args:
            rois (Tensor): shape (n, 4) or (n, 5)
            label (Tensor): shape (n, )
            bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4)
            img_meta (dict): Image meta info.

        Returns:
            Tensor: Regressed bboxes, the same shape as input rois.
        """
        assert rois.size(1) == 4 or rois.size(1) == 5, repr(rois.shape)

        if not self.reg_class_agnostic:
            label = label * 4
            inds = torch.stack((label, label + 1, label + 2, label + 3), 1)
            bbox_pred = torch.gather(bbox_pred, 1, inds)
        assert bbox_pred.size(1) == 4

        if rois.size(1) == 4:
            new_rois = delta2bbox(rois, bbox_pred, self.target_means,
                                  self.target_stds, img_meta['img_shape'])
        else:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_meta['img_shape'])
            new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)

        return new_rois
Esempio n. 3
0
    def loss(self,
             rpn_rois,
             cls_score,
             bbox_pred,
             labels,
             label_weights,
             bbox_targets,
             bbox_weights,
             reduction_override=None):
        losses = dict()
        pos_inds = labels > 0
        pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds]
        if len(pos_bbox_pred) > 0:
            losses['loss_bbox'] = self.loss_bbox(
                pos_bbox_pred,
                bbox_targets[pos_inds],
                bbox_weights[pos_inds],
                avg_factor=bbox_targets.size(0),
                reduction_override=reduction_override)

        avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)

        all_boxes = delta2bbox(rpn_rois[:, 1:], bbox_pred, self.target_means,
                               self.target_stds, None)
        bboxes = all_boxes[pos_inds]
        labels = labels.float()
        if len(bboxes) > 0:
            gtbboxes = delta2bbox(rpn_rois[:, 1:], bbox_targets,
                                  self.target_means, self.target_stds,
                                  None)[pos_inds]
            iou_target = bbox_overlaps(bboxes,
                                       gtbboxes,
                                       'iou',
                                       is_aligned=True)
            labels[pos_inds] = iou_target

        losses['loss_cls'] = self.loss_cls(
            cls_score,
            labels.view(-1, 1),
            label_weights.view(-1, 1),
            avg_factor=avg_factor,
            reduction_override=reduction_override)

        pred_bboxes = torch.cat([all_boxes, cls_score], dim=-1)

        if 'loss_bbox' in losses.keys():
            return dict(loss_siamese_rpn_cls=losses['loss_cls'],
                        loss_siamese_rpn_bbox=losses['loss_bbox']), \
                   pred_bboxes
        else:
            return dict(loss_siamese_rpn_cls=losses['loss_cls'],
                        loss_siamese_rpn_bbox=losses['loss_cls'].new_zeros(
                            losses['loss_cls'].shape)), \
                   pred_bboxes
    def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels,
                    label_weights, bbox_targets, num_total_samples, cfg):

        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
        centerness = centerness.permute(0, 2, 3, 1).reshape(-1)
        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        # classification loss
        loss_cls = self.loss_cls(cls_score,
                                 labels,
                                 label_weights,
                                 avg_factor=num_total_samples)

        pos_inds = torch.nonzero(labels).squeeze(1)

        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds]
            pos_anchors = anchors[pos_inds]
            pos_centerness = centerness[pos_inds]

            centerness_targets = self.centerness_target(
                pos_anchors, pos_bbox_targets)
            pos_decode_bbox_pred = delta2bbox(pos_anchors, pos_bbox_pred,
                                              self.target_means,
                                              self.target_stds)
            pos_decode_bbox_targets = delta2bbox(pos_anchors, pos_bbox_targets,
                                                 self.target_means,
                                                 self.target_stds)

            # regression loss
            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
                                       pos_decode_bbox_targets,
                                       weight=centerness_targets,
                                       avg_factor=1.0)

            # centerness loss
            loss_centerness = self.loss_centerness(
                pos_centerness,
                centerness_targets,
                avg_factor=num_total_samples)

        else:
            loss_bbox = bbox_pred.sum() * 0
            loss_centerness = centerness.sum() * 0
            centerness_targets = torch.tensor(0).cuda()

        return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()
Esempio n. 5
0
    def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels,
                    label_weights, bbox_targets, num_total_samples):

        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
        centerness = centerness.permute(0, 2, 3, 1).reshape(-1)
        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        # classification loss
        pos_inds = (labels > 0).nonzero().squeeze(1)
        num_pos = len(pos_inds)
        loss_cls = sigmoid_focal_loss(cls_score, labels, self.train_cfg.gamma,
                                      self.train_cfg.alpha,
                                      'none').sum()[None] / (num_pos + 2)
        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds]
            pos_anchors = anchors[pos_inds]
            pos_centerness = centerness[pos_inds]

            centerness_targets = self.centerness_target(
                pos_anchors, pos_bbox_targets)
            pos_decode_bbox_pred = delta2bbox(pos_anchors, pos_bbox_pred,
                                              self.target_means,
                                              self.target_stds)
            pos_decode_bbox_targets = delta2bbox(pos_anchors, pos_bbox_targets,
                                                 self.target_means,
                                                 self.target_stds)

            # centerness weighted iou loss
            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
                                       pos_decode_bbox_targets,
                                       weight=centerness_targets,
                                       avg_factor=1.0)

            # centerness loss
            loss_centerness = F.binary_cross_entropy_with_logits(
                pos_centerness, centerness_targets, reduction='mean')[None]

        else:
            loss_bbox = bbox_pred.sum() * 0
            loss_centerness = centerness.sum() * 0
            centerness_targets = torch.tensor(0).cuda()

        return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()
Esempio n. 6
0
def get_bboxes_single(cls_scores, bbox_preds, priors, img_shape, scale_factor,
                      cfg, rescale, cls_out_channels, use_sigmoid_cls,
                      target_means, target_stds):
    cls_scores = cls_scores.view(-1, cls_out_channels)
    bbox_preds = bbox_preds.view(-1, 4)
    priors = priors.view(-1, 4)
    nms_pre = cfg.get('nms_pre', -1)
    if nms_pre > 0 and cls_scores.shape[0] > nms_pre:
        if use_sigmoid_cls:
            max_scores, _ = cls_scores.max(dim=1)
        else:
            max_scores, _ = cls_scores[:, 1:].max(dim=1)
        _, topk_inds = max_scores.topk(nms_pre)
        priors = priors[topk_inds, :]
        bbox_preds = bbox_preds[topk_inds, :]
        cls_scores = cls_scores[topk_inds, :]
    mlvl_bboxes = delta2bbox(priors, bbox_preds, target_means, target_stds,
                             img_shape)
    if rescale:
        mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
    if use_sigmoid_cls:
        padding = cls_scores.new_zeros(cls_scores.shape[0], 1)
        cls_scores = torch.cat([padding, cls_scores], dim=1)
    det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, cls_scores,
                                            cfg.score_thr, cfg.nms,
                                            cfg.max_per_img)
    return det_bboxes, det_labels
Esempio n. 7
0
 def loss_shape_single(self, shape_pred, bbox_anchors, bbox_gts,
                       anchor_weights, anchor_total_num):
     shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view(
         -1, 2)  #单独对于shape的损失,先转话为[[w,h],[w,h]
     bbox_anchors = bbox_anchors.contiguous().view(-1, 4)  #调整BOXES的形状
     bbox_gts = bbox_gts.contiguous().view(-1, 4)  #调整GT的形状
     anchor_weights = anchor_weights.contiguous().view(-1, 4)  #权重
     bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(),
                                         0)  #大小和anchors一样的全是0的张量
     bbox_deltas[:, 2:] += shape_pred  #将预测值放到其w,h位置
     # filter out negative samples to speed-up weighted_bounded_iou_loss,对一些负样本利用权值进行过滤
     inds = torch.nonzero(anchor_weights[:, 0] > 0).squeeze(1)
     bbox_deltas_ = bbox_deltas[inds]
     bbox_anchors_ = bbox_anchors[inds]
     bbox_gts_ = bbox_gts[inds]
     anchor_weights_ = anchor_weights[inds]
     pred_anchors_ = delta2bbox(bbox_anchors_,
                                bbox_deltas_,
                                self.anchoring_means,
                                self.anchoring_stds,
                                wh_ratio_clip=1e-6)  #生成真正预测出的BOXES
     loss_shape = self.loss_shape(
         pred_anchors_,
         bbox_gts_,
         anchor_weights_,
         avg_factor=anchor_total_num)  #根据预测anchor和GT计算两者的损失
     return loss_shape
Esempio n. 8
0
    def get_adaptive_anchors_single(self,
                                    squares,
                                    shape_pred,
                                    center_pred,
                                    use_center_filter=False):
        """
        Args:
            square (tensor): Squares of a single level.
            shape_pred (tensor): Shape predections of a single level.
            center_pred (tensor): center predections of a single level.
            use_center_filter (list[tensor]): Use center filter or not.

        Returns:
            tuple
        """
        # calculate centeration filtering mask
        center_pred = center_pred.sigmoid().detach()
        if use_center_filter:
            center_mask = center_pred >= self.center_filter_thr
        else:
            center_mask = center_pred >= 0.0
        mask = center_mask.permute(1, 2, 0).expand(-1, -1, self.num_anchors)
        mask = mask.contiguous().view(-1)
        # calculate adaptive anchors
        squares = squares[mask]
        anchor_deltas = shape_pred.permute(1, 2, 0).contiguous().view(
            -1, 2).detach()[mask]
        bbox_deltas = anchor_deltas.new_full(squares.size(), 0)
        bbox_deltas[:, 2:] = anchor_deltas
        adaptive_anchors = delta2bbox(squares,
                                      bbox_deltas,
                                      self.anchoring_means,
                                      self.anchoring_stds,
                                      wh_ratio_clip=1e-6)
        return adaptive_anchors, mask
def get_det_bboxes(j, rois,
                   scores,
                   bbox_pred,
                   img_shape,
                   scale_factor,
                   rescale=False,
                   cfg=None):
    target_means = [0., 0., 0., 0.],
    target_stds = [0.033, 0.033, 0.067, 0.067]

    if bbox_pred is not None:
        bboxes = delta2bbox(rois[:, 1:], bbox_pred, target_means,
                            target_stds, img_shape)
    else:
        bboxes = rois[:, 1:].clone()
        if img_shape is not None:
            bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
            bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

    if rescale:
        bboxes /= scale_factor

    if cfg is None:
        return bboxes, scores
    else:
        det_bboxes, det_labels = multiclass_nms(bboxes, scores,
                                                cfg.score_thr, cfg.nms,
                                                cfg.max_per_img)

        return [j, det_bboxes, det_labels]
 def loss(self,
          cls_score,
          bbox_pred,
          labels,
          label_weights,
          bbox_targets,
          bbox_weights,
          pos_boxes,
          reduction_override=None):
     losses = dict()
     if cls_score is not None:
         avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)
         losses['loss_cls'] = self.loss_cls(
             cls_score,
             labels,
             label_weights,
             avg_factor=avg_factor,
             reduction_override=reduction_override)
         losses['acc'] = accuracy(cls_score, labels)
     if bbox_pred is not None:
         pos_inds = labels > 0
         if self.reg_class_agnostic:
             pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds]
         else:
             pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1,
                                            4)[pos_inds, labels[pos_inds]]
         pos_bbox_pred = delta2bbox(pos_boxes, pos_bbox_pred,
                                    self.target_means, self.target_stds)
         losses['loss_bbox'] = self.loss_bbox(
             pos_bbox_pred,
             bbox_targets[pos_inds],
             bbox_weights[pos_inds],
             avg_factor=bbox_targets.size(0),
             reduction_override=reduction_override)
     return losses
Esempio n. 11
0
 def _get_det_bboxes_single(self,
                            cls_scores,
                            bbox_preds,
                            mlvl_anchors,
                            img_shape,
                            scale_factor,
                            cfg,
                            rescale=False):
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
     mlvl_proposals = []
     mlvl_scores = []
     for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,
                                              mlvl_anchors):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2, 0).contiguous().view(
             -1, self.cls_out_channels)
         scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().view(-1, 4)
         proposals = delta2bbox(anchors, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
         mlvl_proposals.append(proposals)
         mlvl_scores.append(scores)
     mlvl_proposals = torch.cat(mlvl_proposals)
     if rescale:
         mlvl_proposals /= mlvl_proposals.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     det_bboxes, det_labels = multiclass_nms(mlvl_proposals, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Esempio n. 12
0
 def loss_shape_single(
     self, shape_pred, bbox_anchors, bbox_gts, anchor_weights, anchor_total_num
 ):
     shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view(-1, 2)
     bbox_anchors = bbox_anchors.contiguous().view(-1, 4)
     bbox_gts = bbox_gts.contiguous().view(-1, 4)
     anchor_weights = anchor_weights.contiguous().view(-1, 4)
     bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(), 0)
     bbox_deltas[:, 2:] += shape_pred
     # filter out negative samples to speed-up weighted_bounded_iou_loss
     inds = torch.nonzero(anchor_weights[:, 0] > 0).squeeze(1)
     bbox_deltas_ = bbox_deltas[inds]
     bbox_anchors_ = bbox_anchors[inds]
     bbox_gts_ = bbox_gts[inds]
     anchor_weights_ = anchor_weights[inds]
     pred_anchors_ = delta2bbox(
         bbox_anchors_,
         bbox_deltas_,
         self.anchoring_means,
         self.anchoring_stds,
         wh_ratio_clip=1e-6,
     )
     loss_shape = self.loss_shape(
         pred_anchors_, bbox_gts_, anchor_weights_, avg_factor=anchor_total_num
     )
     return loss_shape
Esempio n. 13
0
    def get_track_bboxes(self,
                         rois,
                         cls_score,
                         bbox_pred,
                         img_shape,
                         scale_factor,
                         rescale=False,
                         cfg=None):
        if self.use_sigmoid_cls:
            scores = torch.sigmoid(
                cls_score) if cls_score is not None else None
        else:
            scores = F.softmax(
                cls_score,
                dim=1)[:, 1][:, None] if cls_score is not None else None
        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

        if rescale:
            bboxes /= scale_factor

        if cfg is None:
            bboxes = torch.cat([bboxes, scores], dim=1)
            return bboxes
Esempio n. 14
0
    def get_det_bboxes(self,
                       rois,
                       cls_score,
                       bbox_pred,
                       img_shape,
                       scale_factor,
                       rescale=False,
                       cfg=None,
                       im_info=None,
                       ):
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None

        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:]
            # TODO: add clip here

        if rescale:
            bboxes /= scale_factor
            # for ct img clip black
            crop, im_scale = im_info
            crop = crop.cuda().float()
            bboxes = map_box_back(bboxes, crop[2], crop[0], im_scale)

        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(
                bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img)

            return det_bboxes, det_labels
Esempio n. 15
0
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          centernesses,
                          mlvl_anchors,
                          img_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_logits = []
        mlvl_centerness = []
        for cls_score, bbox_pred, centerness, anchors in zip(
                cls_scores, bbox_preds, centernesses, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

            logits = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
            scores = logits.sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                logits = logits[topk_inds, :]
                centerness = centerness[topk_inds]

            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_logits.append(logits)
            mlvl_centerness.append(centerness)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)

        mlvl_scores = torch.cat(mlvl_scores)
        mlvl_logits = torch.cat(mlvl_logits)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        mlvl_centerness = torch.cat(mlvl_centerness)

        det_bboxes, det_labels, det_logits = multiclass_nms_with_logits(
            mlvl_bboxes,
            mlvl_scores,
            mlvl_logits,
            cfg.score_thr,
            cfg.nms,
            cfg.max_per_img,
            score_factors=mlvl_centerness)
        return det_bboxes, det_labels, det_logits
    def get_det_bboxes(self,
                       rois,
                       cls_score,
                       bbox_pred,
                       img_shape,
                       scale_factor,
                       rescale=False,
                       cfg=None):
        # if isinstance(cls_score, list):
        #     cls_score = sum(cls_score) / float(len(cls_score))
        # scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
        scores = cls_score
        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

        if rescale:
            if isinstance(scale_factor, float):
                bboxes /= scale_factor
            else:
                bboxes /= torch.from_numpy(scale_factor).to(bboxes.device)

        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(bboxes, scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)

            return det_bboxes, det_labels
    def get_bboxes_single_stage1(self,
                                 cls_score_list,
                                 bbox_pred_list,
                                 mlvl_anchors,
                                 img_shape,
                                 scale_factor,
                                 cfg,
                                 rescale=False):
        """
        Transform anchors to refined bboxes
        """
        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
        mlvl_bboxes = []
        for cls_score, bbox_pred, anchors in zip(cls_score_list,
                                                 bbox_pred_list, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            cls_score = cls_score.permute(1, 2,
                                          0).reshape(-1, self.cls_out_channels)
            if self.use_sigmoid_cls:
                scores = cls_score.sigmoid()
            else:
                scores = cls_score.softmax(-1)
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)

            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
            mlvl_bboxes.append(bboxes)
        # mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        return mlvl_bboxes
Esempio n. 18
0
    def get_det_bboxes(self,
                       rois,
                       cls_score,
                       bbox_pred,
                       img_shape,
                       scale_factor,
                       rescale=False,
                       cfg=None):
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None

        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:]
            # TODO: add clip here

        if rescale:
            bboxes /= scale_factor

        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(bboxes, scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)

            return det_bboxes, det_labels
Esempio n. 19
0
    def get_det_bboxes(
            self,
            rois,
            cls_score,
            bbox_pred,
            img_shape,
            scale_factor,
            roi_feats=None,  # 新加入的参数   为了得到预测框所对应的map
            rescale=False,
            cfg=None):
        cls_score_for_load = cls_score
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None

        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

        if rescale:
            if isinstance(scale_factor, float):
                bboxes /= scale_factor
            else:
                scale_factor = torch.from_numpy(scale_factor).to(bboxes.device)
                bboxes = (bboxes.view(bboxes.size(0), -1, 4) /
                          scale_factor).view(bboxes.size()[0], -1)

        if cfg is None:
            return bboxes, scores
        else:
            # NMS抑制
            # bboxes的shape:box数 * 8
            # scores的shape:box数 * 2
            # det_bboxes的shape:NMS抑制后的数量 * 5
            # det_bboxes的shape:NMS抑制后的数量 * 1
            det_bboxes, det_labels = multiclass_nms(
                bboxes,
                scores,
                cfg.score_thr,
                cfg.nms,
                cfg.max_per_img,
                rois=rois,
                cls_score=cls_score_for_load,
                bbox_pred=bbox_pred,
                roi_feats=roi_feats  # 新加入的参数   为了得到预测框所对应的map
            )
            # print("------------------------------------bbox_head.py--------------------------------------------------")
            # print("===bboxes:", bboxes.shape)
            # print("===scores:", scores.shape)
            # print("===det_bboxes:", det_bboxes.shape)
            # print("===det_labels:", det_labels.shape)
            # print("--------------------------------------------------------------------------------------")

            return det_bboxes, det_labels
Esempio n. 20
0
    def predict_weights(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, anchors,
                        loss_cls, loss_bbox):
        labels = labels.reshape(-1, )
        pos_inds = labels > 0
        postive_score = cls_score[pos_inds, labels[pos_inds] - 1].sigmoid()
        pos_pred = bbox_pred[pos_inds]
        pos_proposals = anchors[pos_inds]
        pos_bbox = delta2bbox(pos_proposals, pos_pred, means=self.target_means, stds=self.target_stds)
        pos_targets = bbox_targets[pos_inds]
        gt_bboxes = delta2bbox(pos_proposals, pos_targets, means=self.target_means, stds=self.target_stds)
        ious = bbox_overlaps(gt_bboxes, pos_bbox, is_aligned=True).view(-1, )
        total_ious = ious.new_full((pos_inds.numel(),), 0.0)
        total_ious[pos_inds] = ious
        total_scores = postive_score.new_full((pos_inds.numel(),), 0.0)
        total_scores[pos_inds] = postive_score
        uncertainty_prediction = self.uncertainty_predictor(
            total_ious,
            total_scores,
            loss_cls.sum(dim=1).detach().data,
            loss_bbox.detach().data
        )
        losses = dict()
        uncertainty_prediction_cls = uncertainty_prediction[:, 0]
        uncertainty_prediction_reg = uncertainty_prediction[:, 1]
        uncertainty_prediction_cls = torch.clamp(uncertainty_prediction_cls, min=self.cls_prediction_min,
                                                 max=self.cls_prediction_max)
        uncertainty_prediction_reg = torch.clamp(uncertainty_prediction_reg, min=self.reg_prediction_min,
                                                 max=self.reg_prediction_max)
        uncertainty_prediction_cls = torch.ones_like(
                uncertainty_prediction_cls) * uncertainty_prediction_cls.mean()
        losses.update({
                        "loss_uncertainty_cls": uncertainty_prediction_cls.sum() / uncertainty_prediction_cls.numel() * self.uncertainty_cls_weight})
        losses.update({
                "loss_uncertainty_reg": uncertainty_prediction_reg[
                                                pos_inds].mean() * self.uncertainty_reg_weight})

        uncertainty_prediction_reg = torch.exp(-1. * uncertainty_prediction_reg)
        uncertainty_prediction_cls = torch.exp(-1. * uncertainty_prediction_cls)
        losses.update({
            "cls_prediction_pos": uncertainty_prediction_cls[pos_inds].mean(),
            "cls_prediction_neg": uncertainty_prediction_cls[~pos_inds].mean(),
            "cls_prediction_reg": uncertainty_prediction_reg[pos_inds].mean(),
        })
        bbox_weights = bbox_weights.detach().data * uncertainty_prediction_reg.view(-1, 1)
        label_weights = label_weights.detach().data * uncertainty_prediction_cls.view(-1, 1)
        return label_weights, bbox_weights, losses
Esempio n. 21
0
 def get_bboxes_single(self,
                       cls_score_list,
                       bbox_pred_list,
                       embed_feats_list,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     mlvl_feats = []
     feat_chans = embed_feats_list[0].size(1)
     for cls_score, bbox_pred, feat, anchors in zip(cls_score_list,
                                                    bbox_pred_list,
                                                    embed_feats_list,
                                                    mlvl_anchors):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # Feat: [#A, C, H, W] -> [#A*H*W, C]
         feat = feat.permute(0, 2, 3, 1).reshape(-1, feat_chans)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             # Get maximum scores for foreground classes.
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
             feat = feat[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
         mlvl_feats.append(feat)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     mlvl_feats = torch.cat(mlvl_feats)
     if self.use_sigmoid_cls:
         # Add a dummy background class to the front when using sigmoid
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     det_bboxes, det_labels, det_feats = multiclass_nms_with_feat(
         mlvl_bboxes, mlvl_scores, mlvl_feats, cfg.score_thr, cfg.nms,
         cfg.max_per_img)
     return det_bboxes, det_labels, det_feats
Esempio n. 22
0
 def get_bboxes_single(self,
                       cls_score_list,
                       bbox_pred_list,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       gt_bboxes,
                       gt_labels,
                       rescale=False,
                       parent_scores=None):
     """
     Transform outputs for a single batch item into labeled boxes.
     """
     assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     for idx, (cls_score, bbox_pred, anchors) in enumerate(
             zip(cls_score_list, bbox_pred_list, mlvl_anchors)):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             if parent_scores is not None and self.use_forest:
                 scores = self.get_forest_based_score(
                     cls_score, parent_scores[idx])
             else:
                 scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             # Get maximum scores for foreground classes.
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         # Add a dummy background class to the front when using sigmoid
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Esempio n. 23
0
 def get_bboxes_single(
     self,
     cls_scores,
     bbox_preds,
     mlvl_anchors,
     img_shape,
     scale_factor,
     cfg,
     rescale=False,
 ):
     mlvl_proposals = []
     for idx in range(len(cls_scores)):
         rpn_cls_score = cls_scores[idx]
         rpn_bbox_pred = bbox_preds[idx]
         assert (rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:])
         anchors = mlvl_anchors[idx]
         rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
         if self.use_sigmoid_cls:
             rpn_cls_score = rpn_cls_score.reshape(-1)
             scores = rpn_cls_score.sigmoid()
         else:
             rpn_cls_score = rpn_cls_score.reshape(-1, 2)
             scores = rpn_cls_score.softmax(dim=1)[:, 1]
         rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
             _, topk_inds = scores.topk(cfg.nms_pre)
             rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
             anchors = anchors[topk_inds, :]
             scores = scores[topk_inds]
         proposals = delta2bbox(
             anchors,
             rpn_bbox_pred,
             self.target_means,
             self.target_stds,
             img_shape,
         )
         if cfg.min_bbox_size > 0:
             w = proposals[:, 2] - proposals[:, 0] + 1
             h = proposals[:, 3] - proposals[:, 1] + 1
             valid_inds = torch.nonzero((w >= cfg.min_bbox_size)
                                        &
                                        (h >= cfg.min_bbox_size)).squeeze()
             proposals = proposals[valid_inds, :]
             scores = scores[valid_inds]
         proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.nms_post, :]
         mlvl_proposals.append(proposals)
     proposals = torch.cat(mlvl_proposals, 0)
     if cfg.nms_across_levels:
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.max_num, :]
     else:
         scores = proposals[:, 4]
         num = min(cfg.max_num, proposals.shape[0])
         _, topk_inds = scores.topk(num)
         proposals = proposals[topk_inds, :]
     return proposals
Esempio n. 24
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     mlvl_proposals = []
     # 遍历每一张特征图得到的anchors进行坐标还原
     for idx in range(len(cls_scores)):
         rpn_cls_score = cls_scores[idx]
         rpn_bbox_pred = bbox_preds[idx]
         assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
         anchors = mlvl_anchors[idx]
         rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
         if self.use_sigmoid_cls:
             rpn_cls_score = rpn_cls_score.reshape(-1)
             scores = rpn_cls_score.sigmoid()
         else:
             rpn_cls_score = rpn_cls_score.reshape(-1, 2)
             scores = rpn_cls_score.softmax(dim=1)[:, 1]
         rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # 按照得分从上万个里面选取2000个最高得分的box
         if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
             _, topk_inds = scores.topk(cfg.nms_pre)
             rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
             anchors = anchors[topk_inds, :]
             scores = scores[topk_inds]
         # 偏移转换为真实的bbox作标
         proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
                                self.target_stds, img_shape)
         if cfg.min_bbox_size > 0:
             w = proposals[:, 2] - proposals[:, 0] + 1
             h = proposals[:, 3] - proposals[:, 1] + 1
             valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
                                        (h >= cfg.min_bbox_size)).squeeze()
             proposals = proposals[valid_inds, :]
             scores = scores[valid_inds]
         # NMS筛选
         proposals = torch.cat([proposals, scores.unsqueeze(-1)],
                               dim=-1)  #将得加入xywh中成为xywhc,便于NMS
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.nms_post, :]
         mlvl_proposals.append(proposals)  #每张特征图选完得到的box都加到这里
     # 全部的proposal box连接到一起
     proposals = torch.cat(mlvl_proposals, 0)
     if cfg.nms_across_levels:
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.max_num, :]
     else:
         # proposal再按照分类得分压缩到2000(如果本身就小于2000就只排序)
         scores = proposals[:, 4]
         num = min(cfg.max_num, proposals.shape[0])  # proposal的数目
         _, topk_inds = scores.topk(num)  # 按照score排序
         proposals = proposals[topk_inds, :]
     return proposals
Esempio n. 25
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       obj_reps,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     # TODO:
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) == len(
         obj_reps)
     mlvl_bboxes = []
     mlvl_scores = []
     mlvl_reps = []
     for cls_score, bbox_pred, obj_rep, anchors in zip(
             cls_scores, bbox_preds, obj_reps, mlvl_anchors):
         assert cls_score.size()[-2:] == bbox_pred.size(
         )[-2:] == obj_rep.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         obj_rep = obj_rep.permute(1, 2, 0).reshape(-1,
                                                    self.rep_channels * 2)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
             obj_rep = obj_rep[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
         mlvl_reps.append(obj_rep)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     mlvl_reps = torch.cat(mlvl_reps)
     det_bboxes, det_labels, det_reps = multiclass_nms_with_extra(
         mlvl_bboxes, mlvl_scores, mlvl_reps, cfg.score_thr, cfg.nms,
         cfg.max_per_img)
     return det_bboxes, det_labels, det_reps
Esempio n. 26
0
    def get_bboxes(self,
                   n_batches,
                   rois,
                   cls_scores,
                   bbox_preds,
                   target_metas,
                   cfg=None,):
        assert len(cls_scores)==1 and len(bbox_preds)==1 and len(target_metas)==1
        cls_scores = cls_scores[0]
        bbox_preds = bbox_preds[0]
        target_metas = target_metas[0]
        bboxes_list = [[] for _ in range(n_batches)]
        scores_list = [[] for _ in range(n_batches)]
        if isinstance(cls_scores, list):
            cls_scores = sum(cls_scores) / float(len(cls_scores))
        scores = cls_scores
        if bbox_preds is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_preds, self.target_means,
                                self.target_stds, None)
        else:
            bboxes = rois[:, 1:].clone()

        for i in range(n_batches):
            inds = torch.nonzero(rois[:, 0]==i).view(-1)
            bboxes_list[i] = bboxes[inds, :]
            img_shape = target_metas[inds[0]]['img_shape']
            bboxes_list[i][:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
            bboxes_list[i][:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)
            scores_list[i] = scores[inds, :]

        bboxes = bboxes_list
        scores = scores_list
        '''
        for roi, cls_score, bbox_pred, target_meta in zip(rois, cls_scores, bbox_preds, target_metas):
            roi = roi.view(1, -1)
            cls_score = cls_score.view(1, -1)
            bbox_pred = bbox_pred.view(1, -1)            

            if isinstance(cls_score, list):
                cls_score = sum(cls_score) / float(len(cls_score))
            #scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
            scores = cls_score
            if bbox_pred is not None:
                bboxes = delta2bbox(roi[:, 1:], bbox_pred, self.target_means,
                                    self.target_stds, img_shape)
            else:
                bboxes = roi[:, 1:].clone()
                if img_shape is not None:
                    bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                    bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)
            bboxes_list[int(roi[0, 0])].append(bboxes)
            scores_list[int(roi[0, 0])].append(scores)
        bboxes = [torch.cat(bboxes, dim=0) for bboxes in bboxes_list]
        scores = [torch.cat(scores, dim=0) for scores in scores_list]
        '''
        return bboxes, scores
Esempio n. 27
0
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          bbox_embeds,
                          mlvl_anchors,
                          img_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_embeds = []
        # no need to filter out any embeds.
        for cls_score, bbox_pred, bbox_embed, anchors in zip(
                cls_scores, bbox_preds, bbox_embeds, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            cls_score = cls_score.permute(1, 2,
                                          0).reshape(-1, self.cls_out_channels)
            if self.use_sigmoid_cls:
                scores = cls_score.sigmoid()
            else:
                scores = cls_score.softmax(-1)

            bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().reshape(-1, 4)
            bbox_embed = bbox_embed.permute(1, 2, 0).contiguous().reshape(
                -1, self.num_classes * self.inst_embeds)
            if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
                if self.use_sigmoid_cls:
                    max_scores, _ = scores.max(dim=1)
                else:
                    max_scores, _ = scores[:, 1:].max(dim=1)
                _, topk_inds = max_scores.topk(cfg.nms_pre)
                bbox_pred = bbox_pred[topk_inds, :]
                anchors = anchors[topk_inds, :]
                scores = scores[topk_inds, :]
                bbox_embed = bbox_embed[topk_inds, :]

            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_embeds.append(bbox_embed)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            # do not use scale_factor during training.
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        mlvl_embeds = torch.cat(mlvl_embeds)
        if self.use_sigmoid_cls:
            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
            mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        det_bboxes, det_labels, det_embeds = self.multiclass_nms_emb(
            mlvl_bboxes, mlvl_scores, mlvl_embeds, cfg.score_thr, cfg.nms,
            cfg.max_per_img)
        return det_bboxes, det_labels, det_embeds
Esempio n. 28
0
    def get_first_det_bboxes(self, rois, bbox_pred, img_shape):
        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

        return bboxes
Esempio n. 29
0
 def get_bboxes_single(
         self,
         cls_scores,  # 包含不同特征层的结果,以便fpn使用
         bbox_preds,  # cls_scores/bbox_preds/m1v1_anchors的第一维表示哪层特征图
         mlvl_anchors,  # 假设有3层特征图,则cls_scores[0]-bbox_preds[0]-m1v1_anchors[0]对应
         img_shape,  # 同理,cls_scores[1]-bbox_preds[1]-m1v1_anchors[1]对应
         scale_factor,
         cfg,
         rescale=False):
     mlvl_proposals = []
     for idx in range(len(cls_scores)):
         rpn_cls_score = cls_scores[idx]
         rpn_bbox_pred = bbox_preds[idx]
         assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
         anchors = mlvl_anchors[idx]
         rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
         if self.use_sigmoid_cls:
             rpn_cls_score = rpn_cls_score.reshape(-1)
             scores = rpn_cls_score.sigmoid()
         else:
             rpn_cls_score = rpn_cls_score.reshape(-1, 2)
             scores = rpn_cls_score.softmax(dim=1)[:, 1]
         rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # nms_pre:NMS前,选出置信度前nms_pre高的anchor
         if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
             _, topk_inds = scores.topk(
                 cfg.nms_pre)  # 置信度前nms_pre高的anchor inds
             rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
             anchors = anchors[topk_inds, :]
             scores = scores[topk_inds]
         # 将选出的anchor由delta变换得到proposals(x1,y1,x2,y2)
         proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
                                self.target_stds, img_shape)
         if cfg.min_bbox_size > 0:
             w = proposals[:, 2] - proposals[:, 0] + 1
             h = proposals[:, 3] - proposals[:, 1] + 1
             valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
                                        (h >= cfg.min_bbox_size)).squeeze()
             proposals = proposals[valid_inds, :]
             scores = scores[valid_inds]
         proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
         proposals, _ = nms(proposals, cfg.nms_thr)  #根据nms_thr完成NMS
         proposals = proposals[:cfg.nms_post, :]  # 选出置信度前nms_post的proposals
         mlvl_proposals.append(proposals)
     proposals = torch.cat(mlvl_proposals, 0)
     if cfg.nms_across_levels:
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.max_num, :]
     else:
         scores = proposals[:, 4]
         num = min(cfg.max_num, proposals.shape[0])
         _, topk_inds = scores.topk(num)
         proposals = proposals[topk_inds, :]
     return proposals
Esempio n. 30
0
 def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds,
                           mlvl_anchors, img_shape, coo_num, cfg):
     mlvl_proposals = []
     for idx in range(len(rpn_cls_scores)):
         rpn_cls_score = rpn_cls_scores[idx]
         rpn_bbox_pred = rpn_bbox_preds[idx]
         assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
         anchors = mlvl_anchors[idx]
         if self.use_sigmoid_cls:
             rpn_cls_score = rpn_cls_score.permute(1, 2,
                                                   0).contiguous().view(-1)
             rpn_cls_prob = rpn_cls_score.sigmoid()
             scores = rpn_cls_prob
         else:
             rpn_cls_score = rpn_cls_score.permute(1, 2,
                                                   0).contiguous().view(
                                                       -1, 2)
             rpn_cls_prob = F.softmax(rpn_cls_score, dim=1)
             scores = rpn_cls_prob[:, 1]
         rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).contiguous().view(
             -1, coo_num)
         _, order = scores.sort(0, descending=True)
         if cfg.nms_pre > 0:
             order = order[:cfg.nms_pre]
             rpn_bbox_pred = rpn_bbox_pred[order, :]
             anchors = anchors[order, :]
             scores = scores[order]
         if coo_num==4:    
             proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
                                self.target_stds, img_shape)
         elif coo_num==8:
             proposals = delta2bbox_8_coo(anchors, rpn_bbox_pred, img_shape)                       
         w = proposals[:, 2] - proposals[:, 0] + 1
         h = proposals[:, 3] - proposals[:, 1] + 1
         valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
                                    (h >= cfg.min_bbox_size)).squeeze()
         proposals = proposals[valid_inds, :]
         scores = scores[valid_inds]
         proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.nms_post, :]
         mlvl_proposals.append(proposals)
     proposals = torch.cat(mlvl_proposals, 0)
     if cfg.nms_across_levels:
         proposals, _ = nms(proposals, cfg.nms_thr)
         proposals = proposals[:cfg.max_num, :]
     else:
         scores = proposals[:, 4]
         _, order = scores.sort(0, descending=True)
         num = min(cfg.max_num, proposals.shape[0])
         order = order[:num]
         proposals = proposals[order, :]
     return proposals