Ejemplo n.º 1
0
    def sim_loss(self, bboxes_list, nongt_inds_list, rel_feats_list):
        sim_losses = []
        num_img = len(bboxes_list)
        sim_avg_factor = 0.
        for img_id in range(num_img):
            # bboxes pre-proprocess for each image
            gt_bboxes, nongt_bboxes, valid = self._bboxes_preprocess(
                bboxes_list[img_id], nongt_inds_list[img_id])
            if not valid:
                sim_losses.append(self._zero_loss(rel_feats_list, img_id))
                continue
            num_gts = gt_bboxes.size(0)
            num_nongts = nongt_bboxes.size(0)

            # nongt positive indexes
            nongt_iou_mat = bbox_overlaps(nongt_bboxes, gt_bboxes)
            nongt_iof_mat = bbox_overlaps(nongt_bboxes, gt_bboxes, mode='iof')
            nongt_max_iou, nongt_argmax_iou = nongt_iou_mat.max(dim=1)
            nongt_iof = nongt_iof_mat[torch.arange(nongt_bboxes.size(0)),
                                      nongt_argmax_iou]
            nongt_pt_inds = (nongt_iof >= self.min_iof) & (
                nongt_max_iou >= 0.1) & (nongt_max_iou < 0.5)
            nongt_pos_inds = (nongt_max_iou >= 0.5) | nongt_pt_inds

            # similarity matrix
            sim_mat_list = []
            for rel_feat in rel_feats_list:
                rel_norm = F.normalize(rel_feat[img_id], dim=1)
                nongt_rel_norm = F.normalize(
                    rel_feat[img_id][nongt_inds_list[img_id], :], dim=1)
                nongt_rel_norm = nongt_rel_norm.permute(1, 0).contiguous()
                sim_mat = torch.einsum(
                    'nc,ck->nk', [rel_norm, nongt_rel_norm]).unsqueeze(dim=0)
                sim_mat_list.append(sim_mat)

            # instance-wise contrastive loss
            for gt_id in range(num_gts):
                pos_inds, neg_inds, valid = self._get_pos_neg_inds(
                    gt_id, num_nongts, nongt_pos_inds, nongt_argmax_iou)
                if not valid:
                    sim_losses.append(self._zero_loss(rel_feats_list, img_id))
                    continue
                for sim_mat in sim_mat_list:
                    try:
                        sim_loss = self.contrastive_loss(
                            sim_mat=sim_mat[:, gt_id, :],
                            pos_inds=pos_inds,
                            neg_inds=neg_inds)
                        sim_avg_factor += 1.
                        sim_losses.append(sim_loss)
                    except Exception:
                        sim_losses.append(
                            self._zero_loss(rel_feats_list, img_id))
                        continue
        return torch.stack(sim_losses).sum().reshape(-1) / (sim_avg_factor +
                                                            1e-3)
def iou_loss(pred, target, linear=False, mode='log', eps=1e-6):
    """IoU loss.

    Computing the IoU loss between a set of predicted bboxes and target bboxes.
    The loss is calculated as negative log of IoU.

    Args:
        pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).
        linear (bool, optional): If True, use linear scale of loss instead of
            log scale. Default: False.
        mode (str): Loss scaling mode, including "linear", "square", and "log".
            Default: 'log'
        eps (float): Eps to avoid log(0).

    Return:
        torch.Tensor: Loss tensor.
    """
    assert mode in ['linear', 'square', 'log']
    if linear:
        mode = 'linear'
        warnings.warn('DeprecationWarning: Setting "linear=True" in '
                      'iou_loss is deprecated, please use "mode=`linear`" '
                      'instead.')
    ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
    if mode == 'linear':
        loss = 1 - ious
    elif mode == 'square':
        loss = 1 - ious**2
    elif mode == 'log':
        loss = -ious.log()
    else:
        raise NotImplementedError
    return loss
Ejemplo n.º 3
0
    def test_forward(self, proposals, prop_bboxes, asso_probs, cfg,
                     vanish_frames, prev_ids):
        # valid_tracklet_idxs, append 0 for new objects as the begining.
        valid_t_idxs = torch.nonzero(
            vanish_frames < cfg.long_term_frames).squeeze(1) + 1
        valid_t_idxs = torch.cat(
            (torch.tensor([0], dtype=torch.long,
                          device=valid_t_idxs.device), valid_t_idxs))
        # Similarity with Softmax
        asso_scores = torch.zeros_like(asso_probs)
        asso_scores[:, valid_t_idxs] = F.softmax(asso_probs[:, valid_t_idxs],
                                                 dim=1)  # [N_det, N_emb + 1]
        asso_scores *= (asso_scores > cfg.asso_score_thre).float()
        # Overlaps
        overlaps = torch.zeros_like(asso_scores)
        valid_prop = prop_bboxes[:, -1] > cfg.prop_score_thre
        prop_bboxes = prop_bboxes[valid_prop, :]
        prev_ids = prev_ids[valid_prop]
        prop_overlaps = bbox_overlaps(proposals[:, :4], prop_bboxes[:, :4])
        overlaps[:, prev_ids + 1] = prop_overlaps
        overlaps *= (overlaps > cfg.prop_overlap_thre).float()

        if self.affinity == 'overlap':
            return overlaps
        elif self.affinity == 'similarity':
            return asso_scores
        elif self.affinity == 'all':
            return overlaps + asso_scores
    def kmeans_anchors(self):
        self.logger.info(
            f'Start cluster {self.num_anchors} YOLO anchors with K-means...')
        bboxes = self.get_zero_center_bbox_tensor()
        cluster_center_idx = torch.randint(
            0, bboxes.shape[0], (self.num_anchors, )).to(self.device)

        assignments = torch.zeros((bboxes.shape[0], )).to(self.device)
        cluster_centers = bboxes[cluster_center_idx]
        if self.num_anchors == 1:
            cluster_centers = self.kmeans_maximization(bboxes, assignments,
                                                       cluster_centers)
            anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()
            anchors = sorted(anchors, key=lambda x: x[0] * x[1])
            return anchors

        prog_bar = mmcv.ProgressBar(self.iters)
        for i in range(self.iters):
            converged, assignments = self.kmeans_expectation(
                bboxes, assignments, cluster_centers)
            if converged:
                self.logger.info(f'K-means process has converged at iter {i}.')
                break
            cluster_centers = self.kmeans_maximization(bboxes, assignments,
                                                       cluster_centers)
            prog_bar.update()
        print('\n')
        avg_iou = bbox_overlaps(bboxes,
                                cluster_centers).max(1)[0].mean().item()

        anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()
        anchors = sorted(anchors, key=lambda x: x[0] * x[1])
        self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}')

        return anchors
Ejemplo n.º 5
0
def Diou_loss(pred, target, eps=1e-3):
    """
    cal DIOU of two boxes or batch boxes
    Computing the DIoU loss between a set of predicted bboxes and target bboxes.
    Args:
        pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (Tensor): Corresponding gt bboxes, shape (n, 4).
        eps (float): Eps to avoid log(0).

    Return:
        Tensor: Loss tensor.
    """
    # cal outer boxes
    outer_left_top = torch.min(pred[:, :2], target[:, :2])
    outer_right_down = torch.max(pred[:, 2:], target[:, 2:])
    outer = outer_right_down - outer_left_top
    outer_diagonal_line = outer[:, 0]**2 + outer[:, 1]**2

    # cal center distance
    pred_ctr = (pred[:, :2] + pred[:, 2:]) * 0.5
    target_ctr = (target[:, :2] + target[:, 2:]) * 0.5
    ctr_dis = (pred_ctr[:,0]-target_ctr[0])**2 + \
              (pred_ctr[:,1]-target_ctr[1])**2
    # cal diou
    ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
    dious = ious - ctr_dis / outer_diagonal_line
    loss = 1 - dious
    return loss
Ejemplo n.º 6
0
def merge_results(result1, result2, mode='inter'):
    if not isinstance(result1, np.ndarray):
        result1 = np.array(result1)
    if not isinstance(result2, np.ndarray):
        result2 = np.array(result2)
    if mode == 'inter':
        ious = bbox_overlaps(result1, result2)  # n, k
        max_iou = np.max(ious, axis=1)
        picks = np.where(max_iou > 0.7)
        return picks
Ejemplo n.º 7
0
    def update_memo(self, ids, bboxes, embeds, labels, frame_id):
        tracklet_inds = ids > -1

        # update memo
        for id, bbox, embed, label in zip(ids[tracklet_inds],
                                          bboxes[tracklet_inds],
                                          embeds[tracklet_inds],
                                          labels[tracklet_inds]):
            id = int(id)
            if id in self.tracklets.keys():
                velocity = (bbox - self.tracklets[id]['bbox']) / (
                    frame_id - self.tracklets[id]['last_frame'])
                self.tracklets[id]['bbox'] = bbox
                self.tracklets[id]['embed'] = (
                    1 - self.memo_momentum
                ) * self.tracklets[id]['embed'] + self.memo_momentum * embed
                self.tracklets[id]['last_frame'] = frame_id
                self.tracklets[id]['label'] = label
                self.tracklets[id]['velocity'] = (
                    self.tracklets[id]['velocity'] *
                    self.tracklets[id]['acc_frame'] +
                    velocity) / (self.tracklets[id]['acc_frame'] + 1)
                self.tracklets[id]['acc_frame'] += 1
            else:
                self.tracklets[id] = dict(bbox=bbox,
                                          embed=embed,
                                          label=label,
                                          last_frame=frame_id,
                                          velocity=torch.zeros_like(bbox),
                                          acc_frame=0)

        backdrop_inds = torch.nonzero(ids == -1, as_tuple=False).squeeze(1)
        ious = bbox_overlaps(bboxes[backdrop_inds, :-1], bboxes[:, :-1])
        for i, ind in enumerate(backdrop_inds):
            if (ious[i, :ind] > self.nms_backdrop_iou_thr).any():
                backdrop_inds[i] = -1
        backdrop_inds = backdrop_inds[backdrop_inds > -1]

        self.backdrops.insert(
            0,
            dict(bboxes=bboxes[backdrop_inds],
                 embeds=embeds[backdrop_inds],
                 labels=labels[backdrop_inds]))

        # pop memo
        invalid_ids = []
        for k, v in self.tracklets.items():
            if frame_id - v['last_frame'] >= self.memo_tracklet_frames:
                invalid_ids.append(k)
        for invalid_id in invalid_ids:
            self.tracklets.pop(invalid_id)

        if len(self.backdrops) > self.memo_backdrop_frames:
            self.backdrops.pop()
Ejemplo n.º 8
0
def get_iou(pr, gt):
    if pr.shape[1] == 5:
        pr = t.FloatTensor(pr)[:, 1:]
    else:
        pr = t.FloatTensor(pr)

    gt = t.FloatTensor(gt)[:, 1:]
    ious = bbox_overlaps(pr, gt)
    v, idx = t.max(ious, dim=1)

    return v.view(-1)
 def avg_iou_cost(anchor_params, bboxes):
     assert len(anchor_params) % 2 == 0
     anchor_whs = torch.tensor([
         [w, h] for w, h in zip(anchor_params[::2], anchor_params[1::2])
     ]).to(bboxes.device, dtype=bboxes.dtype)
     anchor_boxes = bbox_cxcywh_to_xyxy(
         torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1))
     ious = bbox_overlaps(bboxes, anchor_boxes)
     max_ious, _ = ious.max(1)
     cost = 1 - max_ious.mean().item()
     return cost
Ejemplo n.º 10
0
    def loss(self,
             rpn_rois,
             cls_score,
             bbox_pred,
             labels,
             label_weights,
             bbox_targets,
             bbox_weights,
             reduction_override=None):
        losses = dict()
        pos_inds = labels > 0
        pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds]
        if len(pos_bbox_pred) > 0:
            losses['loss_bbox'] = self.loss_bbox(
                pos_bbox_pred,
                bbox_targets[pos_inds],
                bbox_weights[pos_inds],
                avg_factor=bbox_targets.size(0),
                reduction_override=reduction_override)

        avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)

        all_boxes = delta2bbox(rpn_rois[:, 1:], bbox_pred, self.target_means,
                               self.target_stds, None)
        bboxes = all_boxes[pos_inds]
        labels = labels.float()
        if len(bboxes) > 0:
            gtbboxes = delta2bbox(rpn_rois[:, 1:], bbox_targets,
                                  self.target_means, self.target_stds,
                                  None)[pos_inds]
            iou_target = bbox_overlaps(bboxes,
                                       gtbboxes,
                                       'iou',
                                       is_aligned=True)
            labels[pos_inds] = iou_target

        losses['loss_cls'] = self.loss_cls(
            cls_score,
            labels.view(-1, 1),
            label_weights.view(-1, 1),
            avg_factor=avg_factor,
            reduction_override=reduction_override)

        pred_bboxes = torch.cat([all_boxes, cls_score], dim=-1)

        if 'loss_bbox' in losses.keys():
            return dict(loss_siamese_rpn_cls=losses['loss_cls'],
                        loss_siamese_rpn_bbox=losses['loss_bbox']), \
                   pred_bboxes
        else:
            return dict(loss_siamese_rpn_cls=losses['loss_cls'],
                        loss_siamese_rpn_bbox=losses['loss_cls'].new_zeros(
                            losses['loss_cls'].shape)), \
                   pred_bboxes
Ejemplo n.º 11
0
 def assign_gt_single(det_bbox, gt_bbox, pos_iou_thr = 0.5):
     bboxes = det_bbox[:, :4]
     overlaps = bbox_overlaps(gt_bbox, bboxes)
     # for each gt, which predict best overlaps with it
     # for each gt, the max iou of all predictions
     max_overlaps, argmax_overlaps = overlaps.max(dim=1)
     num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
     assigned_gt_inds = overlaps.new_full((num_gts,), -1, dtype=torch.long)
     # assign positive: above positive IoU threshold
     pos_inds = max_overlaps >= pos_iou_thr
     assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds]
     return assigned_gt_inds
Ejemplo n.º 12
0
    def loss_single(self, cls_score, pts_pred_init, pts_pred_refine, labels,
                    label_weights, bbox_gt_init, bbox_weights_init,
                    bbox_gt_refine, bbox_weights_refine, stride,
                    num_total_samples_init, num_total_samples_refine):
        # classification loss
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        cls_score = cls_score.contiguous()
        # points loss
        bbox_gt_init = bbox_gt_init.reshape(-1, 4)
        bbox_weights_init = bbox_weights_init.reshape(-1, 4)
        bbox_pred_init = self.points2bbox(pts_pred_init.reshape(
            -1, 2 * self.num_points),
                                          y_first=False)
        bbox_gt_refine = bbox_gt_refine.reshape(-1, 4)
        bbox_weights_refine = bbox_weights_refine.reshape(-1, 4)
        bbox_pred_refine = self.points2bbox(pts_pred_refine.reshape(
            -1, 2 * self.num_points),
                                            y_first=False)
        normalize_term = self.point_base_scale * stride
        loss_pts_init = self.loss_bbox_init(bbox_pred_init / normalize_term,
                                            bbox_gt_init / normalize_term,
                                            bbox_weights_init,
                                            avg_factor=num_total_samples_init)
        loss_pts_refine = self.loss_bbox_refine(
            bbox_pred_refine / normalize_term,
            bbox_gt_refine / normalize_term,
            bbox_weights_refine,
            avg_factor=num_total_samples_refine)
        if self.use_vfl:
            pos_inds = ((labels >= 0)
                        & (labels < self.num_classes)).nonzero().reshape(-1)
            pos_labels = labels[pos_inds]
            ious = bbox_overlaps(bbox_pred_refine.detach(),
                                 bbox_gt_refine.detach(),
                                 is_aligned=True)
            pos_ious = ious[pos_inds]
            cls_iou_targets = torch.zeros_like(cls_score)
            cls_iou_targets[pos_inds, pos_labels] = pos_ious
            loss_cls = self.loss_cls(cls_score,
                                     cls_iou_targets,
                                     label_weights.unsqueeze(1),
                                     avg_factor=num_total_samples_refine)
        else:
            loss_cls = self.loss_cls(cls_score,
                                     labels,
                                     label_weights,
                                     avg_factor=num_total_samples_refine)

        return loss_cls, loss_pts_init, loss_pts_refine
Ejemplo n.º 13
0
def giou_loss(pred, target, eps=1e-7):
    r"""`Generalized Intersection over Union: A Metric and A Loss for Bounding
    Box Regression <https://arxiv.org/abs/1902.09630>`_.
    Args:
        pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).
        eps (float): Eps to avoid log(0).
    Return:
        Tensor: Loss tensor.
    """
    gious = bbox_overlaps(pred, target, mode='giou', is_aligned=True)
    loss = 1 - gious
    return loss
Ejemplo n.º 14
0
def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0):
    """Apply bounding-box voting to refine `top_dets` by voting with `all_dets`.
    See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the
    referenced  paper) can be applied by setting `scoring_method` appropriately.
    """
    # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
    # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
    top_dets_out = top_dets.copy()
    top_boxes = top_dets[:, :4]
    all_boxes = all_dets[:, :4]
    all_scores = all_dets[:, 4]
    top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
    for k in range(top_dets_out.shape[0]):
        inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0]
        boxes_to_vote = all_boxes[inds_to_vote, :]
        ws = all_scores[inds_to_vote]
        top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws)
        if scoring_method == 'ID':
            # Identity, nothing to do
            pass
        elif scoring_method == 'TEMP_AVG':
            # Average probabilities (considered as P(detected class) vs.
            # P(not the detected class)) after smoothing with a temperature
            # hyperparameter.
            P = np.vstack((ws, 1.0 - ws))
            P_max = np.max(P, axis=0)
            X = np.log(P / P_max)
            X_exp = np.exp(X / beta)
            P_temp = X_exp / np.sum(X_exp, axis=0)
            P_avg = P_temp[0].mean()
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'AVG':
            # Combine new probs from overlapping boxes
            top_dets_out[k, 4] = ws.mean()
        elif scoring_method == 'IOU_AVG':
            P = ws
            ws = top_to_all_overlaps[k, inds_to_vote]
            P_avg = np.average(P, weights=ws)
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'GENERALIZED_AVG':
            P_avg = np.mean(ws**beta)**(1.0 / beta)
            top_dets_out[k, 4] = P_avg
        elif scoring_method == 'QUASI_SUM':
            top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta
        else:
            raise NotImplementedError(
                'Unknown scoring method {}'.format(scoring_method))

    return top_dets_out
Ejemplo n.º 15
0
def iou_loss(pred, target, eps=1e-6):
    """IoU loss.
    Computing the IoU loss between a set of predicted bboxes and target bboxes.
    The loss is calculated as negative log of IoU.
    Args:
        pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (Tensor): Corresponding gt bboxes, shape (n, 4).
        eps (float): Eps to avoid log(0).
    Return:
        Tensor: Loss tensor.
    """
    ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
    loss = -ious.log()
    return loss
Ejemplo n.º 16
0
    def get_bbox_prob_and_overlap(self, points, bbox_preds, gt_bboxes):

        bbox_targets = bbox2distance(points,
                                     gt_bboxes[:, None, :].repeat(
                                         1, points.shape[1], 1),
                                     norm=self.distance_norm)
        bbox_prob = self.loss_bbox(bbox_preds,
                                   bbox_targets,
                                   reduction_override='none').neg().exp()

        pred_boxes = distance2bbox(points, bbox_preds, norm=self.distance_norm)
        bbox_overlap = bbox_overlaps(gt_bboxes[:,
                                               None, :].expand_as(pred_boxes),
                                     pred_boxes,
                                     is_aligned=True)

        return bbox_prob, bbox_overlap
Ejemplo n.º 17
0
    def predict_weights(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, anchors,
                        loss_cls, loss_bbox):
        labels = labels.reshape(-1, )
        pos_inds = labels > 0
        postive_score = cls_score[pos_inds, labels[pos_inds] - 1].sigmoid()
        pos_pred = bbox_pred[pos_inds]
        pos_proposals = anchors[pos_inds]
        pos_bbox = delta2bbox(pos_proposals, pos_pred, means=self.target_means, stds=self.target_stds)
        pos_targets = bbox_targets[pos_inds]
        gt_bboxes = delta2bbox(pos_proposals, pos_targets, means=self.target_means, stds=self.target_stds)
        ious = bbox_overlaps(gt_bboxes, pos_bbox, is_aligned=True).view(-1, )
        total_ious = ious.new_full((pos_inds.numel(),), 0.0)
        total_ious[pos_inds] = ious
        total_scores = postive_score.new_full((pos_inds.numel(),), 0.0)
        total_scores[pos_inds] = postive_score
        uncertainty_prediction = self.uncertainty_predictor(
            total_ious,
            total_scores,
            loss_cls.sum(dim=1).detach().data,
            loss_bbox.detach().data
        )
        losses = dict()
        uncertainty_prediction_cls = uncertainty_prediction[:, 0]
        uncertainty_prediction_reg = uncertainty_prediction[:, 1]
        uncertainty_prediction_cls = torch.clamp(uncertainty_prediction_cls, min=self.cls_prediction_min,
                                                 max=self.cls_prediction_max)
        uncertainty_prediction_reg = torch.clamp(uncertainty_prediction_reg, min=self.reg_prediction_min,
                                                 max=self.reg_prediction_max)
        uncertainty_prediction_cls = torch.ones_like(
                uncertainty_prediction_cls) * uncertainty_prediction_cls.mean()
        losses.update({
                        "loss_uncertainty_cls": uncertainty_prediction_cls.sum() / uncertainty_prediction_cls.numel() * self.uncertainty_cls_weight})
        losses.update({
                "loss_uncertainty_reg": uncertainty_prediction_reg[
                                                pos_inds].mean() * self.uncertainty_reg_weight})

        uncertainty_prediction_reg = torch.exp(-1. * uncertainty_prediction_reg)
        uncertainty_prediction_cls = torch.exp(-1. * uncertainty_prediction_cls)
        losses.update({
            "cls_prediction_pos": uncertainty_prediction_cls[pos_inds].mean(),
            "cls_prediction_neg": uncertainty_prediction_cls[~pos_inds].mean(),
            "cls_prediction_reg": uncertainty_prediction_reg[pos_inds].mean(),
        })
        bbox_weights = bbox_weights.detach().data * uncertainty_prediction_reg.view(-1, 1)
        label_weights = label_weights.detach().data * uncertainty_prediction_cls.view(-1, 1)
        return label_weights, bbox_weights, losses
Ejemplo n.º 18
0
    def get_bbox_prob_and_overlap(self, anchors, bbox_preds, gt_bboxes):
        bbox_targets = bbox2delta(
            anchors,
            gt_bboxes[:, None, :].expand_as(anchors),
            self.target_means,
            self.target_stds
        )
        bbox_prob = self.loss_bbox(bbox_preds, bbox_targets, reduction_override='none').sum(dim=-1).neg().exp()

        pred_boxes = delta2bbox(
            anchors,
            bbox_preds,
            self.target_means,
            self.target_stds
        )
        bbox_overlap = bbox_overlaps(gt_bboxes[:, None, :].expand_as(pred_boxes), pred_boxes, is_aligned=True)

        return bbox_prob, bbox_overlap
Ejemplo n.º 19
0
    def assign_ids(self,
                   ids,
                   det_bboxes,
                   weight_iou_with_det_scores=False,
                   match_iou_thr=0.5):
        """Assign ids.

        Args:
            ids (list[int]): Tracking ids.
            det_bboxes (Tensor): of shape (N, 5)
            weight_iou_with_det_scores (bool, optional): Whether using
                detection scores to weight IOU which is used for matching.
                Defaults to False.
            match_iou_thr (float, optional): Matching threshold.
                Defaults to 0.5.

        Returns:
            tuple(int): The assigning ids.
        """
        # get track_bboxes
        track_bboxes = np.zeros((0, 4))
        for id in ids:
            track_bboxes = np.concatenate(
                (track_bboxes, self.tracks[id].mean[:4][None]), axis=0)
        track_bboxes = torch.from_numpy(track_bboxes).to(det_bboxes)
        track_bboxes = bbox_cxcyah_to_xyxy(track_bboxes)

        # compute distance
        ious = bbox_overlaps(track_bboxes, det_bboxes[:, :4])
        if weight_iou_with_det_scores:
            ious *= det_bboxes[:, 4][None]
        dists = (1 - ious).cpu().numpy()

        # bipartite match
        if dists.size > 0:
            cost, row, col = lap.lapjv(
                dists, extend_cost=True, cost_limit=1 - match_iou_thr)
        else:
            row = np.zeros(len(ids)).astype(np.int32) - 1
            col = np.zeros(len(det_bboxes)).astype(np.int32) - 1
        return row, col
Ejemplo n.º 20
0
def box_filter(boxes, must_overlap=False):
    """ Only include boxes that overlap as possible relations. 
    If no overlapping boxes, use all of them."""
    n_cands = boxes.shape[0]

    overlaps = bbox_overlaps(torch.from_numpy(boxes),
                             torch.from_numpy(boxes)) > 0
    overlaps = overlaps.data.numpy()
    np.fill_diagonal(overlaps, 0)

    all_possib = np.ones_like(overlaps, dtype=np.bool)
    np.fill_diagonal(all_possib, 0)

    if must_overlap:
        possible_boxes = np.column_stack(np.where(overlaps))

        if possible_boxes.size == 0:
            possible_boxes = np.column_stack(np.where(all_possib))
    else:
        possible_boxes = np.column_stack(np.where(all_possib))
    return possible_boxes
Ejemplo n.º 21
0
    def get_match_score(self, bboxes, labels, prev_bboxes, prev_labels,
                        similarity_logits):
        """Get the match score.

        Args:
            bboxes (torch.Tensor): of shape (num_current_bboxes, 5) in
                [tl_x, tl_y, br_x, br_y, score] format. Denoting the detection
                bboxes of current frame.
            labels (torch.Tensor): of shape (num_current_bboxes, )
            prev_bboxes (torch.Tensor): of shape (num_previous_bboxes, 5) in
                [tl_x, tl_y, br_x, br_y, score] format.  Denoting the
                detection bboxes of previous frame.
            prev_labels (torch.Tensor): of shape (num_previous_bboxes, )
            similarity_logits (torch.Tensor): of shape (num_current_bboxes,
                num_previous_bboxes + 1). Denoting the similarity logits from
                track head.

        Returns:
            torch.Tensor: The matching score of shape (num_current_bboxes,
            num_previous_bboxes + 1)
        """
        similarity_scores = similarity_logits.softmax(dim=1)

        ious = bbox_overlaps(bboxes[:, :4], prev_bboxes[:, :4])
        iou_dummy = ious.new_zeros(ious.shape[0], 1)
        ious = torch.cat((iou_dummy, ious), dim=1)

        label_deltas = (labels.view(-1, 1) == prev_labels).float()
        label_deltas_dummy = label_deltas.new_ones(label_deltas.shape[0], 1)
        label_deltas = torch.cat((label_deltas_dummy, label_deltas), dim=1)

        match_score = similarity_scores.log()
        match_score += self.match_weights['det_score'] * \
            bboxes[:, 4].view(-1, 1).log()
        match_score += self.match_weights['iou'] * ious
        match_score += self.match_weights['det_label'] * label_deltas

        return match_score
Ejemplo n.º 22
0
def iou_loss(pred, target, linear=False, eps=1e-6):
    """IoU loss.

    Computing the IoU loss between a set of predicted bboxes and target bboxes.
    The loss is calculated as negative log of IoU.

    Args:
        pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),
            shape (n, 4).
        target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).
        linear (bool, optional): If True, use linear scale of loss instead of
            log scale. Default: False.
        eps (float): Eps to avoid log(0).

    Return:
        torch.Tensor: Loss tensor.
    """
    ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)
    if linear:
        loss = 1 - ious
    else:
        loss = -ious.log()
    return loss
Ejemplo n.º 23
0
    def loss_single(self, cls_score, bbox_pred, labels, label_weights, level,
                    bbox_targets, bbox_weights, num_total_samples, cfg):

        #generate anchors
        anchors = self.anchor_generators[level].grid_anchors(self.featmap_sizes[level], self.anchor_strides[level])
        anchors = anchors.repeat(2,1)

        # classification loss
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)

        # regression loss
        bbox_targets = bbox_targets.reshape(-1, 4)
        bbox_weights = bbox_weights.reshape(-1, 4)
        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
        if 'is_iou' in cfg.keys() and cfg['is_iou'] == True:
            #get IOU
            bbox = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds)
            ious = bbox_overlaps(bbox, bbox_targets, is_aligned=True)
            loss_cls = self.loss_cls(
                cls_score, labels, label_weights, avg_factor=num_total_samples,ious=ious)
            loss_bbox = self.loss_bbox(
                bbox_pred,
                bbox_targets,
                bbox_weights,
                avg_factor=num_total_samples)
        else:
            loss_cls = self.loss_cls(
                cls_score, labels, label_weights, avg_factor=num_total_samples)
            loss_bbox = self.loss_bbox(
                bbox_pred,
                bbox_targets,
                bbox_weights,
                avg_factor=num_total_samples)
        return loss_cls, loss_bbox
Ejemplo n.º 24
0
def eval_recalls(gts, proposals, proposal_nums=None, iou_thrs=None):
    """Calculate recalls.

    Args:
        gts(list or ndarray): a list of arrays of shape (n, 4)
        proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)
        proposal_nums(int or list of int or ndarray): top N proposals
        thrs(float or list or ndarray): iou thresholds
    Returns:
        ndarray: recalls of different ious and proposal nums
    """

    img_num = len(gts)
    assert img_num == len(proposals)

    proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)

    all_ious = []
    for i in range(img_num):
        if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:
            scores = proposals[i][:, 4]
            sort_idx = np.argsort(scores)[::-1]
            img_proposal = proposals[i][sort_idx, :]
        else:
            img_proposal = proposals[i]

        prop_num = min(img_proposal.shape[0], proposal_nums[-1])
        if gts[i] is None or gts[i].shape[0] == 0:
            ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)
        else:
            ious = bbox_overlaps(torch.tensor(gts[i]),
                                 torch.tensor(img_proposal[:prop_num, :4]))
            ious = ious.data.numpy()
        all_ious.append(ious)
    all_ious = np.array(all_ious)
    recalls = _recalls(all_ious, proposal_nums, iou_thrs)
    return recalls
Ejemplo n.º 25
0
    def get_roi_mask(self, cls_scores, img_metas, gt_bboxes, phi=0.5):
        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        from mmdet.core import bbox_overlaps
        with torch.no_grad():
            anchor_list, _ = self.get_anchors(featmap_sizes, img_metas)
            mask_batch = []
            for batch in range(len(gt_bboxes)):
                mask_level = []
                target_lvls = self._map_roi_levels(gt_bboxes[batch],
                                                   len(anchor_list[batch]))
                for level in range(len(anchor_list[batch])):
                    gt_level = gt_bboxes[batch][target_lvls == level]
                    h, w = featmap_sizes[level][0], featmap_sizes[level][1]
                    mask_per_img = torch.zeros([h, w],
                                               dtype=torch.double).cuda()
                    if gt_level.shape[0] > 0:
                        IoU_map = bbox_overlaps(anchor_list[batch][level],
                                                gt_level)
                        max_iou, _ = torch.max(IoU_map, dim=0)
                        IoU_map = IoU_map.view(h, w, self.num_anchors, -1)
                        for ins in range(gt_level.shape[0]):
                            max_iou_per_gt = max_iou[ins] * phi
                            mask_per_gt = torch.sum(
                                IoU_map[:, :, :, ins] > max_iou_per_gt, dim=2)
                            mask_per_img += mask_per_gt
                        mask_per_img = (mask_per_img > 0).double()
                    mask_level.append(mask_per_img)
                mask_batch.append(mask_level)
            mask_batch_level = []
            for i in range(len(mask_batch[0])):
                tmp = []
                for batch in range(len(mask_batch)):
                    tmp.append(mask_batch[batch][i])
                mask_batch_level.append(torch.stack(tmp, dim=0))

        return mask_batch_level
Ejemplo n.º 26
0
    def loss(self,
             cls_scores,
             bbox_preds,
             centernesses,
             cof_preds,
             feat_masks,
             gt_bboxes,
             gt_labels,
             img_metas,
             cfg,
             gt_bboxes_ignore=None,
             gt_masks_list=None):
        assert len(cls_scores) == len(bbox_preds) == len(centernesses)
        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
        all_level_points, all_level_strides = self.get_points(featmap_sizes, bbox_preds[0].dtype, bbox_preds[0].device)
        labels, bbox_targets, label_list, bbox_targets_list, gt_inds = self.fcos_target(all_level_points,
                                                                                        gt_bboxes, gt_labels)
        #decode detection and groundtruth
        det_bboxes = []
        det_targets = []
        num_levels = len(bbox_preds)

        for img_id in range(len(img_metas)):
            bbox_pred_list = [
                bbox_preds[i][img_id].permute(1, 2, 0).reshape(-1, 4).detach() for i in range(num_levels)
            ]
            bbox_target_list =  bbox_targets_list[img_id]

            bboxes = []
            targets = []
            for i in range(len(bbox_pred_list)):
                bbox_pred = bbox_pred_list[i]
                bbox_target = bbox_target_list[i]
                points = all_level_points[i]
                bboxes.append(distance2bbox(points, bbox_pred))
                targets.append(distance2bbox(points, bbox_target))

            bboxes = torch.cat(bboxes, dim=0)
            targets = torch.cat(targets, dim=0)

            det_bboxes.append(bboxes)
            det_targets.append(targets)
        gt_masks = []
        for i in range(len(gt_labels)):
            gt_label = gt_labels[i]
            gt_masks.append(torch.from_numpy(np.array(gt_masks_list[i][:gt_label.shape[0]], dtype=np.float32)).to(gt_label.device))

        num_imgs = cls_scores[0].size(0)
        # flatten cls_scores, bbox_preds and centerness
        flatten_cls_scores = [
            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)
            for cls_score in cls_scores
        ]
        flatten_bbox_preds = [
            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
            for bbox_pred in bbox_preds
        ]
        flatten_centerness = [
            centerness.permute(0, 2, 3, 1).reshape(-1)
            for centerness in centernesses
        ]
        flatten_cls_scores = torch.cat(flatten_cls_scores)
        flatten_bbox_preds = torch.cat(flatten_bbox_preds)
        flatten_centerness = torch.cat(flatten_centerness)
        flatten_labels = torch.cat(labels)
        flatten_bbox_targets = torch.cat(bbox_targets)
        # repeat points to align with bbox_preds
        flatten_points = torch.cat(
            [points.repeat(num_imgs, 1) for points in all_level_points])
        flatten_strides = torch.cat(
            [strides.view(-1,1).repeat(num_imgs, 1) for strides in all_level_strides])

        pos_inds = flatten_labels.nonzero().reshape(-1)
        num_pos = len(pos_inds)
        loss_cls = self.loss_cls(
            flatten_cls_scores, flatten_labels,
            avg_factor=num_pos + num_imgs)  # avoid num_pos is 0

        pos_bbox_preds = flatten_bbox_preds[pos_inds]
        pos_centerness = flatten_centerness[pos_inds]

        if num_pos > 0:
            pos_bbox_targets = flatten_bbox_targets[pos_inds]
            pos_centerness_targets = self.centerness_target(pos_bbox_targets)
            pos_points = flatten_points[pos_inds]
            pos_strides = flatten_strides[pos_inds]
            pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds/pos_strides)
            pos_decoded_target_preds = distance2bbox(pos_points,
                                                     pos_bbox_targets/pos_strides)
            # centerness weighted iou loss
            loss_bbox = self.loss_bbox(
                pos_decoded_bbox_preds,
                pos_decoded_target_preds,
                weight=pos_centerness_targets,
                avg_factor=pos_centerness_targets.sum())
            loss_centerness = self.loss_centerness(pos_centerness,
                                                   pos_centerness_targets)
        else:
            loss_bbox = pos_bbox_preds.sum()
            loss_centerness = pos_centerness.sum()

        ##########mask loss#################
        flatten_cls_scores1 = [
            cls_score.permute(0, 2, 3, 1).reshape(num_imgs,-1, self.cls_out_channels)
            for cls_score in cls_scores
        ]
        flatten_cls_scores1 = torch.cat(flatten_cls_scores1,dim=1)

        flatten_cof_preds = [
            cof_pred.permute(0, 2, 3, 1).reshape(cof_pred.shape[0],-1, 32*4)
            for cof_pred in cof_preds
        ]

        loss_mask = 0
        loss_iou = 0
        num_iou = 0.1
        flatten_cof_preds = torch.cat(flatten_cof_preds,dim=1)
        for i in range(num_imgs):
            labels = torch.cat([labels_level.flatten() for labels_level in label_list[i]])
            bbox_dt = det_bboxes[i]/2
            bbox_dt = bbox_dt.detach()
            pos_inds = (labels > 0).nonzero().view(-1)
            cof_pred = flatten_cof_preds[i][pos_inds]
            img_mask = feat_masks[i]
            mask_h = img_mask.shape[1]
            mask_w = img_mask.shape[2]
            idx_gt = gt_inds[i]
            bbox_dt = bbox_dt[pos_inds, :4]


            area = (bbox_dt[:, 2] - bbox_dt[:, 0]) * (bbox_dt[:, 3] - bbox_dt[:, 1])
            bbox_dt = bbox_dt[area > 1.0, :]
            idx_gt = idx_gt[area > 1.0]
            cof_pred = cof_pred[area > 1.0]
            if bbox_dt.shape[0] == 0:
                loss_mask += area.sum()*0
                continue

            bbox_gt = gt_bboxes[i]
            cls_score = flatten_cls_scores1[i, pos_inds, labels[pos_inds] - 1].sigmoid().detach()
            cls_score = cls_score[area>1.0]
            pos_inds = pos_inds[area > 1.0]
            ious = bbox_overlaps(bbox_gt[idx_gt]/2, bbox_dt, is_aligned=True)
            with torch.no_grad():
                weighting = cls_score * ious
                weighting = weighting/(torch.sum(weighting)+0.0001)*len(weighting)

            gt_mask = F.interpolate(gt_masks[i].unsqueeze(0), scale_factor=0.5, mode='bilinear', align_corners=False).squeeze(0)

            shape = np.minimum(feat_masks[i].shape, gt_mask.shape)
            gt_mask_new = gt_mask.new_zeros(gt_mask.shape[0], mask_h, mask_w)
            gt_mask_new[:gt_mask.shape[0], :shape[1], :shape[2]] = gt_mask[:gt_mask.shape[0], :shape[1], :shape[2]]
            gt_mask_new = gt_mask_new.gt(0.5).float()

            gt_mask_new = torch.index_select(gt_mask_new,0,idx_gt).permute(1, 2, 0).contiguous()

            #######spp###########################
            img_mask1 = img_mask.permute(1,2,0)
            pos_masks00 = torch.sigmoid(img_mask1 @ cof_pred[:, 0:32].t())
            pos_masks01 = torch.sigmoid(img_mask1 @ cof_pred[:, 32:64].t())
            pos_masks10 = torch.sigmoid(img_mask1 @ cof_pred[:, 64:96].t())
            pos_masks11 = torch.sigmoid(img_mask1 @ cof_pred[:, 96:128].t())
            pred_masks = torch.stack([pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0)
            pred_masks = self.crop_cuda(pred_masks, bbox_dt)
            gt_mask_crop = self.crop_gt_cuda(gt_mask_new, bbox_dt)
            # pred_masks, gt_mask_crop = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11, bbox_dt,
            #                                       gt_mask_new)

            pre_loss = F.binary_cross_entropy(pred_masks, gt_mask_crop, reduction='none')
            pos_get_csize = center_size(bbox_dt)
            gt_box_width = pos_get_csize[:, 2]
            gt_box_height = pos_get_csize[:, 3]
            pre_loss = pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height / pos_get_csize.shape[0]
            loss_mask += torch.sum(pre_loss*weighting.detach())


            if self.rescoring_flag:
                pos_labels = labels[pos_inds] - 1
                input_iou = pred_masks.detach().unsqueeze(0).permute(3, 0, 1, 2)
                pred_iou = self.convs_scoring(input_iou)
                pred_iou = self.relu(self.mask_scoring(pred_iou))
                pred_iou = F.max_pool2d(pred_iou, kernel_size=pred_iou.size()[2:]).squeeze(-1).squeeze(-1)
                pred_iou = pred_iou[range(pred_iou.size(0)), pos_labels]
                with torch.no_grad():
                    mask_pred = (pred_masks > 0.4).float()
                    mask_pred_areas = mask_pred.sum((0, 1))
                    overlap_areas = (mask_pred * gt_mask_new).sum((0, 1))
                    gt_full_areas = gt_mask_new.sum((0, 1))
                    iou_targets = overlap_areas / (mask_pred_areas + gt_full_areas - overlap_areas + 0.1)

                    iou_weights = ((iou_targets > 0.1) & (iou_targets <= 1.0) & (gt_full_areas >= 10 * 10)).float()

                loss_iou += self.loss_iou(pred_iou.view(-1, 1), iou_targets.view(-1, 1), iou_weights.view(-1, 1))
                num_iou += torch.sum(iou_weights.detach())
        loss_mask = loss_mask/num_imgs
        if self.rescoring_flag:
            loss_iou = loss_iou * 10 / num_iou.detach()
            return dict(
                loss_cls=loss_cls,
                loss_bbox=loss_bbox,
                loss_centerness=loss_centerness,
                loss_mask=loss_mask,
                loss_iou=loss_iou)
        else:
            return dict(
                loss_cls=loss_cls,
                loss_bbox=loss_bbox,
                loss_centerness=loss_centerness,
                loss_mask=loss_mask)
Ejemplo n.º 27
0
def isr_p(cls_score,
          bbox_pred,
          bbox_targets,
          rois,
          sampling_results,
          loss_cls,
          bbox_coder,
          k=2,
          bias=0,
          num_class=80):
    """Importance-based Sample Reweighting (ISR_P), positive part.

    Args:
        cls_score (Tensor): Predicted classification scores.
        bbox_pred (Tensor): Predicted bbox deltas.
        bbox_targets (tuple[Tensor]): A tuple of bbox targets, the are
            labels, label_weights, bbox_targets, bbox_weights, respectively.
        rois (Tensor): Anchors (single_stage) in shape (n, 4) or RoIs
            (two_stage) in shape (n, 5).
        sampling_results (obj): Sampling results.
        loss_cls (func): Classification loss func of the head.
        bbox_coder (obj): BBox coder of the head.
        k (float): Power of the non-linear mapping.
        bias (float): Shift of the non-linear mapping.
        num_class (int): Number of classes, default: 80.

    Return:
        tuple([Tensor]): labels, imp_based_label_weights, bbox_targets,
            bbox_target_weights
    """

    labels, label_weights, bbox_targets, bbox_weights = bbox_targets
    pos_label_inds = ((labels >= 0) &
                      (labels < num_class)).nonzero().reshape(-1)
    pos_labels = labels[pos_label_inds]

    # if no positive samples, return the original targets
    num_pos = float(pos_label_inds.size(0))
    if num_pos == 0:
        return labels, label_weights, bbox_targets, bbox_weights

    # merge pos_assigned_gt_inds of per image to a single tensor
    gts = list()
    last_max_gt = 0
    for i in range(len(sampling_results)):
        gt_i = sampling_results[i].pos_assigned_gt_inds
        gts.append(gt_i + last_max_gt)
        if len(gt_i) != 0:
            last_max_gt = gt_i.max() + 1
    gts = torch.cat(gts)
    assert len(gts) == num_pos

    cls_score = cls_score.detach()
    bbox_pred = bbox_pred.detach()

    # For single stage detectors, rois here indicate anchors, in shape (N, 4)
    # For two stage detectors, rois are in shape (N, 5)
    if rois.size(-1) == 5:
        pos_rois = rois[pos_label_inds][:, 1:]
    else:
        pos_rois = rois[pos_label_inds]

    if bbox_pred.size(-1) > 4:
        bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
        pos_delta_pred = bbox_pred[pos_label_inds, pos_labels].view(-1, 4)
    else:
        pos_delta_pred = bbox_pred[pos_label_inds].view(-1, 4)

    # compute iou of the predicted bbox and the corresponding GT
    pos_delta_target = bbox_targets[pos_label_inds].view(-1, 4)
    pos_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_pred)
    target_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_target)
    ious = bbox_overlaps(pos_bbox_pred, target_bbox_pred, is_aligned=True)

    pos_imp_weights = label_weights[pos_label_inds]
    # Two steps to compute IoU-HLR. Samples are first sorted by IoU locally,
    # then sorted again within the same-rank group
    max_l_num = pos_labels.bincount().max()
    for label in pos_labels.unique():
        l_inds = (pos_labels == label).nonzero().view(-1)
        l_gts = gts[l_inds]
        for t in l_gts.unique():
            t_inds = l_inds[l_gts == t]
            t_ious = ious[t_inds]
            _, t_iou_rank_idx = t_ious.sort(descending=True)
            _, t_iou_rank = t_iou_rank_idx.sort()
            ious[t_inds] += max_l_num - t_iou_rank.float()
        l_ious = ious[l_inds]
        _, l_iou_rank_idx = l_ious.sort(descending=True)
        _, l_iou_rank = l_iou_rank_idx.sort()  # IoU-HLR
        # linearly map HLR to label weights
        pos_imp_weights[l_inds] *= (max_l_num - l_iou_rank.float()) / max_l_num

    pos_imp_weights = (bias + pos_imp_weights * (1 - bias)).pow(k)

    # normalize to make the new weighted loss value equal to the original loss
    pos_loss_cls = loss_cls(cls_score[pos_label_inds],
                            pos_labels,
                            reduction_override='none')
    if pos_loss_cls.dim() > 1:
        ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds][:,
                                                                        None]
        new_pos_loss_cls = pos_loss_cls * pos_imp_weights[:, None]
    else:
        ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds]
        new_pos_loss_cls = pos_loss_cls * pos_imp_weights
    pos_loss_cls_ratio = ori_pos_loss_cls.sum() / new_pos_loss_cls.sum()
    pos_imp_weights = pos_imp_weights * pos_loss_cls_ratio
    label_weights[pos_label_inds] = pos_imp_weights

    bbox_targets = labels, label_weights, bbox_targets, bbox_weights
    return bbox_targets
Ejemplo n.º 28
0
    def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,
                    bbox_targets, stride, soft_targets, num_total_samples):
        """Compute loss of a single scale level.

        Args:
            anchors (Tensor): Box reference for each scale level with shape
                (N, num_total_anchors, 4).
            cls_score (Tensor): Cls and quality joint scores for each scale
                level has shape (N, num_classes, H, W).
            bbox_pred (Tensor): Box distribution logits for each scale
                level with shape (N, 4*(n+1), H, W), n is max value of integral
                set.
            labels (Tensor): Labels of each anchors with shape
                (N, num_total_anchors).
            label_weights (Tensor): Label weights of each anchor with shape
                (N, num_total_anchors)
            bbox_targets (Tensor): BBox regression targets of each anchor wight
                shape (N, num_total_anchors, 4).
            stride (tuple): Stride in this scale level.
            num_total_samples (int): Number of positive samples that is
                reduced over all GPUs.

        Returns:
            dict[tuple, Tensor]: Loss components and weight targets.
        """
        assert stride[0] == stride[1], 'h stride is not equal to w stride!'
        anchors = anchors.reshape(-1, 4)
        cls_score = cls_score.permute(0, 2, 3,
                                      1).reshape(-1, self.cls_out_channels)
        bbox_pred = bbox_pred.permute(0, 2, 3,
                                      1).reshape(-1, 4 * (self.reg_max + 1))
        soft_targets = soft_targets.permute(0, 2, 3,
                                            1).reshape(-1,
                                                       4 * (self.reg_max + 1))

        bbox_targets = bbox_targets.reshape(-1, 4)
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)

        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
        bg_class_ind = self.num_classes
        pos_inds = ((labels >= 0)
                    & (labels < bg_class_ind)).nonzero().squeeze(1)
        score = label_weights.new_zeros(labels.shape)

        if len(pos_inds) > 0:
            pos_bbox_targets = bbox_targets[pos_inds]
            pos_bbox_pred = bbox_pred[pos_inds]
            pos_anchors = anchors[pos_inds]
            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]

            weight_targets = cls_score.detach().sigmoid()
            weight_targets = weight_targets.max(dim=1)[0][pos_inds]
            pos_bbox_pred_corners = self.integral(pos_bbox_pred)
            pos_decode_bbox_pred = distance2bbox(pos_anchor_centers,
                                                 pos_bbox_pred_corners)
            pos_decode_bbox_targets = pos_bbox_targets / stride[0]
            score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(),
                                            pos_decode_bbox_targets,
                                            is_aligned=True)
            pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)
            pos_soft_targets = soft_targets[pos_inds]
            soft_corners = pos_soft_targets.reshape(-1, self.reg_max + 1)

            target_corners = bbox2distance(pos_anchor_centers,
                                           pos_decode_bbox_targets,
                                           self.reg_max).reshape(-1)

            # regression loss
            loss_bbox = self.loss_bbox(pos_decode_bbox_pred,
                                       pos_decode_bbox_targets,
                                       weight=weight_targets,
                                       avg_factor=1.0)

            # dfl loss
            loss_dfl = self.loss_dfl(pred_corners,
                                     target_corners,
                                     weight=weight_targets[:, None].expand(
                                         -1, 4).reshape(-1),
                                     avg_factor=4.0)

            # ld loss
            loss_ld = self.loss_ld(pred_corners,
                                   soft_corners,
                                   weight=weight_targets[:, None].expand(
                                       -1, 4).reshape(-1),
                                   avg_factor=4.0)

        else:
            loss_ld = bbox_pred.sum() * 0
            loss_bbox = bbox_pred.sum() * 0
            loss_dfl = bbox_pred.sum() * 0
            weight_targets = bbox_pred.new_tensor(0)

        # cls (qfl) loss
        loss_cls = self.loss_cls(cls_score, (labels, score),
                                 weight=label_weights,
                                 avg_factor=num_total_samples)

        return loss_cls, loss_bbox, loss_dfl, loss_ld, weight_targets.sum()
Ejemplo n.º 29
0
 def assign_ids(self, x, prop_bboxes, asso_probs, det_bboxes, det_labels,
                img_meta, rescale):
     """Integrate matching score through
         1. Association score with softmax
         2. Semantic consistence
         3. Spatial overlap (Only in consective frames)
         4. Detection confidence
     """
     cfg = self.test_cfg.track
     # id init
     ids = torch.zeros_like(det_bboxes[:, 1]).long() - 1
     # Get semantic consistence, as a flag for assigning ids.
     cat_same = (self.labels == det_labels.view(-1, 1)).float()
     cat_dummy = cat_same.new_ones(cat_same.size(0), 1)
     cat_same = torch.cat((cat_dummy, cat_same), dim=1)
     # calculate feature appearance similarity
     valid_t_idxs = torch.nonzero(
         self.vanish_frames < cfg.long_term_frames).squeeze(1) + 1
     valid_t_idxs = torch.cat(
         (torch.tensor([0], dtype=torch.long,
                       device=valid_t_idxs.device), valid_t_idxs))
     if asso_probs.size(-1) < cat_same.size(-1):
         # Sigmoid or Cosine
         assert asso_probs.size(-1) + 1 == cat_same.size(-1)
         asso_scores = torch.zeros_like(cat_same)
         if asso_scores.max() > 1.0:
             asso_scores[:, 1:] = torch.sigmoid(asso_probs)
         asso_scores[:, 0] += 0.5
     else:
         # Softmax
         assert asso_probs.size(-1) == cat_same.size(-1)
         asso_scores = torch.zeros_like(cat_same)
         asso_scores[:,
                     valid_t_idxs] = F.softmax(asso_probs[:, valid_t_idxs],
                                               dim=1)  # [N_det, N_emb + 1]
         asso_scores *= (asso_scores > cfg.asso_score_thre).float()
     # get overlaps under short-term tracking
     overlaps = torch.zeros_like(cat_same)
     valid_prop = prop_bboxes[:, -1] > cfg.prop_score_thre
     prop_bboxes = prop_bboxes[valid_prop, :]
     prev_ids = self.prev_ids[valid_prop]
     prop_overlaps = bbox_overlaps(det_bboxes[:, :4], prop_bboxes[:, :4])
     overlaps[:, prev_ids + 1] = prop_overlaps
     overlaps *= (overlaps > cfg.prop_overlap_thre).float()
     # short-term matching according to overlaps
     short_scores = overlaps * cat_same
     if cfg.clean_before_short_assign:
         valid_dets = det_bboxes[:, -1] > cfg.new_obj_score_thre
         valid_dets = valid_dets.view(-1, 1).repeat(1, short_scores.size(1))
         short_scores = short_scores * valid_dets.float()
     if prev_ids.shape[0] > 0 and (short_scores > 0).any():
         t2d_idxs = max_matching(short_scores[:, prev_ids + 1])
         is_match = t2d_idxs >= 0
         t2d_idxs = t2d_idxs[is_match]
         prev_ids = prev_ids[is_match]
         ids[t2d_idxs.tolist()] = prev_ids
         if cfg.prop_fn:
             raise NotImplementedError
     if cfg.use_reid:
         # long-term associtation
         valid_dets = ids < 0
         if cfg.clean_before_long_assign:
             valid_dets *= det_bboxes[:, -1] > cfg.new_obj_score_thre
         long_scores = asso_scores * cat_same
         valid_embeds = self.vanish_frames < cfg.long_term_frames
         if prev_ids.shape[0] > 0:
             valid_embeds[prev_ids] = 0
         long_scores[:, 1:] *= valid_embeds.float().view(-1, 1).repeat(
             1, long_scores.size(0)).transpose(1, 0)
         if valid_dets.any() and (long_scores[valid_dets, :] > 0).any():
             valid_d_idxs = torch.nonzero(valid_dets == 1).squeeze(1)
             d2t_idxs = max_matching(long_scores[valid_d_idxs, :].t()) - 1
             is_match = d2t_idxs >= 0
             ids[valid_d_idxs[is_match]] = d2t_idxs[is_match]
     # new objects
     valid_dets = ids < 0
     valid_dets *= det_bboxes[:, -1] > cfg.new_obj_score_thre
     valid_idxs = torch.nonzero(valid_dets > 0).squeeze(1).tolist()
     if len(valid_idxs) > 0:
         for i, valid_idx in enumerate(valid_idxs):
             ids[valid_idx] = self.embeddings.size(0) + i
         new_track_bboxes = det_bboxes[valid_idxs, :]
         new_track_labels = det_labels[valid_idxs]
         vanish_frames = torch.zeros_like(new_track_labels)
         bbox_embeds, track_embeds = self.get_new_embeds(
             x, new_track_bboxes, img_meta, rescale)
         self.update(type='contact',
                     embeddings=track_embeds,
                     bbox_embeds=bbox_embeds,
                     tracklet_scores=new_track_bboxes[:,
                                                      -1].detach().clone(),
                     bboxes=new_track_bboxes.detach().clone(),
                     labels=new_track_labels,
                     vanish_frames=vanish_frames)
     return ids
    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        self.global_step += 1
        x = self.extract_feat(img)

        losses = dict()

        # RPN forward and loss
        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(
                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)
            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
        else:
            proposal_list = proposals

        # assign gts and sample proposals
        if self.with_bbox or self.with_mask:
            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
            bbox_sampler = build_sampler(
                self.train_cfg.rcnn.sampler, context=self)
            num_imgs = img.size(0)
            if gt_bboxes_ignore is None:
                gt_bboxes_ignore = [None for _ in range(num_imgs)]
            sampling_results = []
            for i in range(num_imgs):
                assign_result = bbox_assigner.assign(
                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],
                    gt_labels[i])
                sampling_result = bbox_sampler.sample(
                    assign_result,
                    proposal_list[i],
                    gt_bboxes[i],
                    gt_labels[i],
                    feats=[lvl_feat[i][None] for lvl_feat in x])
                sampling_results.append(sampling_result)

        # bbox head forward and loss
        if self.with_bbox:
            rois = bbox2roi([res.bboxes for res in sampling_results])
            # TODO: a more flexible way to decide which feature maps to use
            bbox_feats = self.bbox_roi_extractor(
                x[:self.bbox_roi_extractor.num_inputs], rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = self.bbox_head(bbox_feats)

            bbox_targets = self.bbox_head.get_target(
                sampling_results, gt_bboxes, gt_labels, self.train_cfg.rcnn)

            # start creating features for input
            pos_inds = bbox_targets[0] > 0
            cls_score_post_softmax = cls_score.softmax(dim=1)
            pos_probs_single_item = cls_score_post_softmax[pos_inds, bbox_targets[0][pos_inds]]
            pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1,
                                           4)[pos_inds, bbox_targets[0][pos_inds]]
            # simple trick to remove NaN values
            pos_bbox_pred[pos_bbox_pred != pos_bbox_pred] = 0
            pos_gts = torch.cat([k.pos_gt_bboxes for k in sampling_results], dim=0)
            pos_proposal = torch.cat([k.pos_bboxes for k in sampling_results], dim=0)
            target_means = self.bbox_head.target_means
            target_stds = self.bbox_head.target_stds
            pos_bbox = delta2bbox(pos_proposal, pos_bbox_pred, means=target_means, stds=target_stds)
            pos_ious = bbox_overlaps(pos_gts, pos_bbox, is_aligned=True)
            pos_ious = pos_ious.view(-1, )

            total_ious = pos_ious.new_full((pos_inds.numel(),), 0.0)
            total_ious[pos_inds] = pos_ious
            total_probs_single_item = pos_probs_single_item.new_full((pos_inds.numel(),), 0.0)
            total_probs_single_item[pos_inds] = pos_probs_single_item

            with torch.no_grad():
                loss_bbox_as_features = self.bbox_head.loss(cls_score, bbox_pred,
                                                            *bbox_targets, reduction_override="none")
                cls_loss_as_feature = loss_bbox_as_features["loss_cls"].detach().data
                bbox_loss_as_feature = loss_bbox_as_features["loss_bbox"].detach().data

                if not self.share_roi_extractor:
                    pos_rois = bbox2roi(
                        [res.pos_bboxes for res in sampling_results])
                    mask_feats = self.mask_roi_extractor(
                        x[:self.mask_roi_extractor.num_inputs], pos_rois)
                    if self.with_shared_head:
                        mask_feats = self.shared_head(mask_feats)
                else:
                    mask_feats = bbox_feats[pos_inds]
                mask_pred = self.mask_head(mask_feats)
                mask_targets = self.mask_head.get_target(
                    sampling_results, gt_masks, self.train_cfg.rcnn)
                pos_labels = torch.cat(
                    [res.pos_gt_labels for res in sampling_results])
                loss_mask = self.mask_head.loss(mask_pred, mask_targets,
                                                pos_labels, sample_weights=None)
                loss_mask = loss_mask.mean(dim=1).mean(dim=1).view(-1, 1)


                losses.update({
                    "pos_cls_loose_value": cls_loss_as_feature[pos_inds].mean(),
                    "neg_cls_loose_value": cls_loss_as_feature[~pos_inds].mean(),
                    "reg_loose_value": bbox_loss_as_feature.sum(dim=1).mean(),
                })
                bbox_loss_as_feature_full = torch.zeros((pos_inds.numel(), bbox_loss_as_feature.shape[1]),
                                                        device=bbox_loss_as_feature.device).type(
                    bbox_loss_as_feature.type())
                bbox_loss_as_feature_full[pos_inds] = bbox_loss_as_feature

                loss_mask_as_feature_full = torch.zeros((pos_inds.numel(), 1),
                                                        device=loss_mask.device).type(
                    loss_mask.type())
                loss_mask_as_feature_full[pos_inds] = loss_mask

            uncertainty_prediction = self.uncertainty_predictor(total_ious.detach().data,
                                                                        total_probs_single_item.detach().data,
                                                                        cls_loss_as_feature,
                                                                        bbox_loss_as_feature_full,
                                                                        loss_mask_as_feature_full)
            uncertainty_prediction_cls = uncertainty_prediction[:, 0]
            uncertainty_prediction_reg = uncertainty_prediction[:, 1]
            uncertainty_prediction_mask = uncertainty_prediction[:, 2]

            uncertainty_prediction_cls = torch.clamp(uncertainty_prediction_cls, min=self.cls_prediction_min,
                                                     max=self.cls_prediction_max)
            uncertainty_prediction_reg = torch.clamp(uncertainty_prediction_reg, min=self.reg_prediction_min,
                                                     max=self.reg_prediction_max)
            uncertainty_prediction_mask = torch.clamp(uncertainty_prediction_mask, min=self.reg_prediction_min,
                                                      max=self.reg_prediction_max)
            negative_avg = uncertainty_prediction_cls[~pos_inds].mean()
            uncertainty_prediction_cls[~pos_inds] = torch.ones_like(
                uncertainty_prediction_cls[~pos_inds]) * negative_avg
            positive_avg = uncertainty_prediction_cls[pos_inds].mean()
            uncertainty_prediction_cls[pos_inds] = torch.ones_like(
                uncertainty_prediction_cls[pos_inds]) * positive_avg
            uncertainty_prediction_cls_for_regularization_pos = (uncertainty_prediction_cls[
                                                                     pos_inds].mean() * self.uncertainty_cls_weight)
            uncertainty_prediction_cls_for_regularization_neg = (uncertainty_prediction_cls[
                                                                     ~pos_inds].mean() * self.negative_regularization)
            losses.update({
                "loss_uncertainty_cls_pos": uncertainty_prediction_cls_for_regularization_pos})
            losses.update({
                "loss_uncertainty_cls_neg": uncertainty_prediction_cls_for_regularization_neg})
            losses.update({
                    "loss_uncertainty_reg": uncertainty_prediction_reg[
                                                pos_inds].mean() * self.uncertainty_reg_weight})
            losses.update({
                "loss_uncertainty_mask": uncertainty_prediction_mask[
                                             pos_inds].mean() * self.uncertainty_mask_weight})
            uncertainty_prediction_reg = torch.exp(-1. * uncertainty_prediction_reg)
            uncertainty_prediction_cls = torch.exp(-1. * uncertainty_prediction_cls)
            uncertainty_prediction_mask = torch.exp(-1. * uncertainty_prediction_mask)

            losses.update({
                "cls_prediction_pos": uncertainty_prediction_cls[pos_inds].mean(),
                "cls_prediction_neg": uncertainty_prediction_cls[~pos_inds].mean(),
                "cls_prediction_reg": uncertainty_prediction_reg[pos_inds].mean(),
            })
            uncertainty_prediction_mask_pos = uncertainty_prediction_mask[pos_inds]

            bbox_targets_weighted = [m.detach().data for m in bbox_targets]
            bbox_targets_weighted[3] = bbox_targets_weighted[3] * uncertainty_prediction_reg.view(-1, 1)
            bbox_targets_weighted[1] = bbox_targets_weighted[1] * uncertainty_prediction_cls.view(-1, )
            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
                                                *bbox_targets_weighted)
            losses.update(loss_bbox)

        # mask head forward and loss
        if self.with_mask:
            if not self.share_roi_extractor:
                pos_rois = bbox2roi(
                    [res.pos_bboxes for res in sampling_results])
                mask_feats = self.mask_roi_extractor(
                    x[:self.mask_roi_extractor.num_inputs], pos_rois)
                if self.with_shared_head:
                    mask_feats = self.shared_head(mask_feats)
            else:
                pos_inds = []
                device = bbox_feats.device
                for res in sampling_results:
                    pos_inds.append(
                        torch.ones(
                            res.pos_bboxes.shape[0],
                            device=device,
                            dtype=torch.uint8))
                    pos_inds.append(
                        torch.zeros(
                            res.neg_bboxes.shape[0],
                            device=device,
                            dtype=torch.uint8))
                pos_inds = torch.cat(pos_inds)
                mask_feats = bbox_feats[pos_inds]
            mask_pred = self.mask_head(mask_feats)

            mask_targets = self.mask_head.get_target(
                sampling_results, gt_masks, self.train_cfg.rcnn)
            pos_labels = torch.cat(
                [res.pos_gt_labels for res in sampling_results])
            loss_mask = self.mask_head.loss(mask_pred, mask_targets,
                                            pos_labels,
                                            sample_weights=uncertainty_prediction_mask_pos)
            losses.update(loss_mask)
        return losses