def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights, bbox_targets, stride, num_total_samples, cfg): anchors = anchors.reshape(-1, 4) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4 * (self.reg_max + 1)) bbox_targets = bbox_targets.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) pos_inds = torch.nonzero(labels).squeeze(1) score = label_weights.new_zeros(labels.shape) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] # (n, 4 * (reg_max + 1)) pos_anchors = anchors[pos_inds] norm_anchor_center = self.anchor_center(pos_anchors) / stride pos_bbox_pred_distance = self.distribution_project(pos_bbox_pred) pos_decode_bbox_pred = distance2bbox(norm_anchor_center, pos_bbox_pred_distance) pos_decode_bbox_targets = pos_bbox_targets / stride target_ltrb = bbox2distance(norm_anchor_center, pos_decode_bbox_targets, self.reg_max).reshape(-1) score[pos_inds] = self.iou_target(pos_decode_bbox_pred.detach(), pos_decode_bbox_targets) weight_targets = \ cls_score.detach().sigmoid().max(dim=1)[0][pos_inds] # regression loss loss_bbox = self.loss_bbox( pos_decode_bbox_pred, pos_decode_bbox_targets, weight=weight_targets, avg_factor=1.0) pred_ltrb = pos_bbox_pred.reshape(-1, self.reg_max + 1) # dfl loss TODO loss_dfl = self.loss_dfl( pred_ltrb, target_ltrb, weight=weight_targets[:, None].expand(-1, 4).reshape(-1), avg_factor=4.0) else: loss_bbox = bbox_pred.sum() * 0 loss_dfl = bbox_pred.sum() * 0 weight_targets = torch.tensor(0).cuda() # qfl loss TODO loss_qfl = self.loss_qfl(cls_score, labels, score, avg_factor=num_total_samples) return loss_qfl, loss_bbox, loss_dfl, weight_targets.sum()
def get_bbox_prob_and_overlap(self, points, bbox_preds, gt_bboxes): bbox_targets = bbox2distance(points, gt_bboxes[:, None, :].repeat( 1, points.shape[1], 1), norm=self.distance_norm) bbox_prob = self.loss_bbox(bbox_preds, bbox_targets, reduction_override='none').neg().exp() pred_boxes = distance2bbox(points, bbox_preds, norm=self.distance_norm) bbox_overlap = bbox_overlaps(gt_bboxes[:, None, :].expand_as(pred_boxes), pred_boxes, is_aligned=True) return bbox_prob, bbox_overlap
def transform_bbox_targets(self, decoded_bboxes, mlvl_points, num_imgs): """Transform bbox_targets (x1, y1, x2, y2) into (l, t, r, b) format. Args: decoded_bboxes (list[Tensor]): Regression targets of each level, in the form of (x1, y1, x2, y2). mlvl_points (list[Tensor]): Points of each fpn level, each has shape (num_points, 2). num_imgs (int): the number of images in a batch. Returns: bbox_targets (list[Tensor]): Regression targets of each level in the form of (l, t, r, b). """ # TODO: Re-implemented in Class PointCoder assert len(decoded_bboxes) == len(mlvl_points) num_levels = len(decoded_bboxes) mlvl_points = [points.repeat(num_imgs, 1) for points in mlvl_points] bbox_targets = [] for i in range(num_levels): bbox_target = bbox2distance(mlvl_points[i], decoded_bboxes[i]) bbox_targets.append(bbox_target) return bbox_targets
def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights, bbox_targets, stride, soft_targets, num_total_samples): """Compute loss of a single scale level. Args: anchors (Tensor): Box reference for each scale level with shape (N, num_total_anchors, 4). cls_score (Tensor): Cls and quality joint scores for each scale level has shape (N, num_classes, H, W). bbox_pred (Tensor): Box distribution logits for each scale level with shape (N, 4*(n+1), H, W), n is max value of integral set. labels (Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (Tensor): Label weights of each anchor with shape (N, num_total_anchors) bbox_targets (Tensor): BBox regression targets of each anchor wight shape (N, num_total_anchors, 4). stride (tuple): Stride in this scale level. num_total_samples (int): Number of positive samples that is reduced over all GPUs. Returns: dict[tuple, Tensor]: Loss components and weight targets. """ assert stride[0] == stride[1], 'h stride is not equal to w stride!' anchors = anchors.reshape(-1, 4) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4 * (self.reg_max + 1)) soft_targets = soft_targets.permute(0, 2, 3, 1).reshape(-1, 4 * (self.reg_max + 1)) bbox_targets = bbox_targets.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes bg_class_ind = self.num_classes pos_inds = ((labels >= 0) & (labels < bg_class_ind)).nonzero().squeeze(1) score = label_weights.new_zeros(labels.shape) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_anchors = anchors[pos_inds] pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0] weight_targets = cls_score.detach().sigmoid() weight_targets = weight_targets.max(dim=1)[0][pos_inds] pos_bbox_pred_corners = self.integral(pos_bbox_pred) pos_decode_bbox_pred = distance2bbox(pos_anchor_centers, pos_bbox_pred_corners) pos_decode_bbox_targets = pos_bbox_targets / stride[0] score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(), pos_decode_bbox_targets, is_aligned=True) pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1) pos_soft_targets = soft_targets[pos_inds] soft_corners = pos_soft_targets.reshape(-1, self.reg_max + 1) target_corners = bbox2distance(pos_anchor_centers, pos_decode_bbox_targets, self.reg_max).reshape(-1) # regression loss loss_bbox = self.loss_bbox(pos_decode_bbox_pred, pos_decode_bbox_targets, weight=weight_targets, avg_factor=1.0) # dfl loss loss_dfl = self.loss_dfl(pred_corners, target_corners, weight=weight_targets[:, None].expand( -1, 4).reshape(-1), avg_factor=4.0) # ld loss loss_ld = self.loss_ld(pred_corners, soft_corners, weight=weight_targets[:, None].expand( -1, 4).reshape(-1), avg_factor=4.0) else: loss_ld = bbox_pred.sum() * 0 loss_bbox = bbox_pred.sum() * 0 loss_dfl = bbox_pred.sum() * 0 weight_targets = bbox_pred.new_tensor(0) # cls (qfl) loss loss_cls = self.loss_cls(cls_score, (labels, score), weight=label_weights, avg_factor=num_total_samples) return loss_cls, loss_bbox, loss_dfl, loss_ld, weight_targets.sum()
def loss_single(self, anchors, cls_score, bbox_pred, kps_pred, labels, label_weights, bbox_targets, kps_targets, kps_weights, stride, num_total_samples): """Compute loss of a single scale level. Args: anchors (Tensor): Box reference for each scale level with shape (N, num_total_anchors, 4). cls_score (Tensor): Cls and quality joint scores for each scale level has shape (N, num_classes, H, W). bbox_pred (Tensor): Box distribution logits for each scale level with shape (N, 4*(n+1), H, W), n is max value of integral set. labels (Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (Tensor): Label weights of each anchor with shape (N, num_total_anchors) bbox_targets (Tensor): BBox regression targets of each anchor wight shape (N, num_total_anchors, 4). stride (tuple): Stride in this scale level. num_total_samples (int): Number of positive samples that is reduced over all GPUs. Returns: dict[str, Tensor]: A dictionary of loss components. """ assert stride[0] == stride[1], 'h stride is not equal to w stride!' use_qscore = True anchors = anchors.reshape(-1, 4) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) if not self.use_dfl: bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) else: bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4 * (self.reg_max + 1)) bbox_targets = bbox_targets.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) if self.use_kps: kps_pred = kps_pred.permute(0, 2, 3, 1).reshape(-1, self.NK * 2) kps_targets = kps_targets.reshape((-1, self.NK * 2)) kps_weights = kps_weights.reshape((-1, self.NK * 2)) #print('AAA000', kps_targets.shape, kps_weights.shape) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes bg_class_ind = self.num_classes pos_inds = ((labels >= 0) & (labels < bg_class_ind)).nonzero().squeeze(1) score = label_weights.new_zeros(labels.shape) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_anchors = anchors[pos_inds] pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0] weight_targets = cls_score.detach().sigmoid() weight_targets = weight_targets.max(dim=1)[0][pos_inds] pos_decode_bbox_targets = pos_bbox_targets / stride[0] if self.use_dfl: pos_bbox_pred_corners = self.integral(pos_bbox_pred) pos_decode_bbox_pred = distance2bbox(pos_anchor_centers, pos_bbox_pred_corners) else: pos_decode_bbox_pred = distance2bbox(pos_anchor_centers, pos_bbox_pred) if self.use_kps: pos_kps_targets = kps_targets[pos_inds] pos_kps_pred = kps_pred[pos_inds] #print('CCC000', kps_weights.shape) pos_kps_weights = kps_weights.max( dim=1)[0][pos_inds] * weight_targets #pos_kps_weights = kps_weights.max(dim=1)[0][pos_inds] pos_kps_weights = pos_kps_weights.reshape((-1, 1)) #pos_kps_weights = kps_weights.max(dim=1, keepdims=True)[0][pos_inds] #print('SSS', pos_kps_weights.sum()) #pos_decode_kps_targets = pos_kps_targets / stride[0] #pos_decode_kps_pred = distance2kps(pos_anchor_centers, pos_kps_pred) pos_decode_kps_targets = kps2distance( pos_anchor_centers, pos_kps_targets / stride[0]) pos_decode_kps_pred = pos_kps_pred #print('ZZZ', pos_decode_kps_targets.shape, pos_decode_kps_pred.shape) #print(pos_kps_weights[0,:].detach().cpu().numpy()) #print(pos_decode_kps_targets[0,:].detach().cpu().numpy()) #print(pos_decode_kps_pred[0,:].detach().cpu().numpy()) #print('CCC111', weight_targets.shape, pos_bbox_pred.shape, pos_decode_bbox_pred.shape, pos_kps_pred.shape, pos_decode_kps_pred.shape, pos_kps_weights.shape) if use_qscore: score[pos_inds] = bbox_overlaps(pos_decode_bbox_pred.detach(), pos_decode_bbox_targets, is_aligned=True) else: score[pos_inds] = 1.0 # regression loss loss_bbox = self.loss_bbox(pos_decode_bbox_pred, pos_decode_bbox_targets, weight=weight_targets, avg_factor=1.0) if self.use_kps: loss_kps = self.loss_kps( pos_decode_kps_pred * self.loss_kps_std, pos_decode_kps_targets * self.loss_kps_std, weight=pos_kps_weights, avg_factor=1.0) else: loss_kps = kps_pred.sum() * 0 # dfl loss if self.use_dfl: pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1) target_corners = bbox2distance(pos_anchor_centers, pos_decode_bbox_targets, self.reg_max).reshape(-1) loss_dfl = self.loss_dfl(pred_corners, target_corners, weight=weight_targets[:, None].expand( -1, 4).reshape(-1), avg_factor=4.0) else: loss_dfl = bbox_pred.sum() * 0 else: loss_bbox = bbox_pred.sum() * 0 loss_dfl = bbox_pred.sum() * 0 loss_kps = kps_pred.sum() * 0 weight_targets = torch.tensor(0).cuda() loss_cls = self.loss_cls(cls_score, (labels, score), weight=label_weights, avg_factor=num_total_samples) return loss_cls, loss_bbox, loss_dfl, loss_kps, weight_targets.sum()