def loss_single(self, cls_score, bbox_pred, rois, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): # classification loss if self.with_cls: labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) loss_cls = self.loss_cls(cls_score, labels, label_weights, avg_factor=num_total_samples) # regression loss bbox_targets = bbox_targets.reshape(-1, 4) bbox_weights = bbox_weights.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) if self.use_iou_reg: # convert delta to bbox rois = rois.reshape(-1, 4) bbox_pred = delta2bbox(rois, bbox_pred, self.target_means, self.target_stds) bbox_targets = delta2bbox(rois, bbox_targets, self.target_means, self.target_stds) loss_reg = self.loss_bbox(bbox_pred, bbox_targets, bbox_weights, avg_factor=num_total_samples) if self.with_cls: return loss_cls, loss_reg return None, loss_reg
def regress_by_class(self, rois, label, bbox_pred, img_meta): """Regress the bbox for the predicted class. Used in Cascade R-CNN. Args: rois (Tensor): shape (n, 4) or (n, 5) label (Tensor): shape (n, ) bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4) img_meta (dict): Image meta info. Returns: Tensor: Regressed bboxes, the same shape as input rois. """ assert rois.size(1) == 4 or rois.size(1) == 5, repr(rois.shape) if not self.reg_class_agnostic: label = label * 4 inds = torch.stack((label, label + 1, label + 2, label + 3), 1) bbox_pred = torch.gather(bbox_pred, 1, inds) assert bbox_pred.size(1) == 4 if rois.size(1) == 4: new_rois = delta2bbox(rois, bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) else: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_meta['img_shape']) new_rois = torch.cat((rois[:, [0]], bboxes), dim=1) return new_rois
def loss(self, rpn_rois, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, reduction_override=None): losses = dict() pos_inds = labels > 0 pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds] if len(pos_bbox_pred) > 0: losses['loss_bbox'] = self.loss_bbox( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0), reduction_override=reduction_override) avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.) all_boxes = delta2bbox(rpn_rois[:, 1:], bbox_pred, self.target_means, self.target_stds, None) bboxes = all_boxes[pos_inds] labels = labels.float() if len(bboxes) > 0: gtbboxes = delta2bbox(rpn_rois[:, 1:], bbox_targets, self.target_means, self.target_stds, None)[pos_inds] iou_target = bbox_overlaps(bboxes, gtbboxes, 'iou', is_aligned=True) labels[pos_inds] = iou_target losses['loss_cls'] = self.loss_cls( cls_score, labels.view(-1, 1), label_weights.view(-1, 1), avg_factor=avg_factor, reduction_override=reduction_override) pred_bboxes = torch.cat([all_boxes, cls_score], dim=-1) if 'loss_bbox' in losses.keys(): return dict(loss_siamese_rpn_cls=losses['loss_cls'], loss_siamese_rpn_bbox=losses['loss_bbox']), \ pred_bboxes else: return dict(loss_siamese_rpn_cls=losses['loss_cls'], loss_siamese_rpn_bbox=losses['loss_cls'].new_zeros( losses['loss_cls'].shape)), \ pred_bboxes
def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels, label_weights, bbox_targets, num_total_samples, cfg): anchors = anchors.reshape(-1, 4) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) centerness = centerness.permute(0, 2, 3, 1).reshape(-1) bbox_targets = bbox_targets.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) # classification loss loss_cls = self.loss_cls(cls_score, labels, label_weights, avg_factor=num_total_samples) pos_inds = torch.nonzero(labels).squeeze(1) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_anchors = anchors[pos_inds] pos_centerness = centerness[pos_inds] centerness_targets = self.centerness_target( pos_anchors, pos_bbox_targets) pos_decode_bbox_pred = delta2bbox(pos_anchors, pos_bbox_pred, self.target_means, self.target_stds) pos_decode_bbox_targets = delta2bbox(pos_anchors, pos_bbox_targets, self.target_means, self.target_stds) # regression loss loss_bbox = self.loss_bbox(pos_decode_bbox_pred, pos_decode_bbox_targets, weight=centerness_targets, avg_factor=1.0) # centerness loss loss_centerness = self.loss_centerness( pos_centerness, centerness_targets, avg_factor=num_total_samples) else: loss_bbox = bbox_pred.sum() * 0 loss_centerness = centerness.sum() * 0 centerness_targets = torch.tensor(0).cuda() return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()
def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels, label_weights, bbox_targets, num_total_samples): anchors = anchors.reshape(-1, 4) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) centerness = centerness.permute(0, 2, 3, 1).reshape(-1) bbox_targets = bbox_targets.reshape(-1, 4) labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) # classification loss pos_inds = (labels > 0).nonzero().squeeze(1) num_pos = len(pos_inds) loss_cls = sigmoid_focal_loss(cls_score, labels, self.train_cfg.gamma, self.train_cfg.alpha, 'none').sum()[None] / (num_pos + 2) if len(pos_inds) > 0: pos_bbox_targets = bbox_targets[pos_inds] pos_bbox_pred = bbox_pred[pos_inds] pos_anchors = anchors[pos_inds] pos_centerness = centerness[pos_inds] centerness_targets = self.centerness_target( pos_anchors, pos_bbox_targets) pos_decode_bbox_pred = delta2bbox(pos_anchors, pos_bbox_pred, self.target_means, self.target_stds) pos_decode_bbox_targets = delta2bbox(pos_anchors, pos_bbox_targets, self.target_means, self.target_stds) # centerness weighted iou loss loss_bbox = self.loss_bbox(pos_decode_bbox_pred, pos_decode_bbox_targets, weight=centerness_targets, avg_factor=1.0) # centerness loss loss_centerness = F.binary_cross_entropy_with_logits( pos_centerness, centerness_targets, reduction='mean')[None] else: loss_bbox = bbox_pred.sum() * 0 loss_centerness = centerness.sum() * 0 centerness_targets = torch.tensor(0).cuda() return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()
def get_bboxes_single(cls_scores, bbox_preds, priors, img_shape, scale_factor, cfg, rescale, cls_out_channels, use_sigmoid_cls, target_means, target_stds): cls_scores = cls_scores.view(-1, cls_out_channels) bbox_preds = bbox_preds.view(-1, 4) priors = priors.view(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and cls_scores.shape[0] > nms_pre: if use_sigmoid_cls: max_scores, _ = cls_scores.max(dim=1) else: max_scores, _ = cls_scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) priors = priors[topk_inds, :] bbox_preds = bbox_preds[topk_inds, :] cls_scores = cls_scores[topk_inds, :] mlvl_bboxes = delta2bbox(priors, bbox_preds, target_means, target_stds, img_shape) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) if use_sigmoid_cls: padding = cls_scores.new_zeros(cls_scores.shape[0], 1) cls_scores = torch.cat([padding, cls_scores], dim=1) det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, cls_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def loss_shape_single(self, shape_pred, bbox_anchors, bbox_gts, anchor_weights, anchor_total_num): shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view( -1, 2) #单独对于shape的损失,先转话为[[w,h],[w,h] bbox_anchors = bbox_anchors.contiguous().view(-1, 4) #调整BOXES的形状 bbox_gts = bbox_gts.contiguous().view(-1, 4) #调整GT的形状 anchor_weights = anchor_weights.contiguous().view(-1, 4) #权重 bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(), 0) #大小和anchors一样的全是0的张量 bbox_deltas[:, 2:] += shape_pred #将预测值放到其w,h位置 # filter out negative samples to speed-up weighted_bounded_iou_loss,对一些负样本利用权值进行过滤 inds = torch.nonzero(anchor_weights[:, 0] > 0).squeeze(1) bbox_deltas_ = bbox_deltas[inds] bbox_anchors_ = bbox_anchors[inds] bbox_gts_ = bbox_gts[inds] anchor_weights_ = anchor_weights[inds] pred_anchors_ = delta2bbox(bbox_anchors_, bbox_deltas_, self.anchoring_means, self.anchoring_stds, wh_ratio_clip=1e-6) #生成真正预测出的BOXES loss_shape = self.loss_shape( pred_anchors_, bbox_gts_, anchor_weights_, avg_factor=anchor_total_num) #根据预测anchor和GT计算两者的损失 return loss_shape
def get_adaptive_anchors_single(self, squares, shape_pred, center_pred, use_center_filter=False): """ Args: square (tensor): Squares of a single level. shape_pred (tensor): Shape predections of a single level. center_pred (tensor): center predections of a single level. use_center_filter (list[tensor]): Use center filter or not. Returns: tuple """ # calculate centeration filtering mask center_pred = center_pred.sigmoid().detach() if use_center_filter: center_mask = center_pred >= self.center_filter_thr else: center_mask = center_pred >= 0.0 mask = center_mask.permute(1, 2, 0).expand(-1, -1, self.num_anchors) mask = mask.contiguous().view(-1) # calculate adaptive anchors squares = squares[mask] anchor_deltas = shape_pred.permute(1, 2, 0).contiguous().view( -1, 2).detach()[mask] bbox_deltas = anchor_deltas.new_full(squares.size(), 0) bbox_deltas[:, 2:] = anchor_deltas adaptive_anchors = delta2bbox(squares, bbox_deltas, self.anchoring_means, self.anchoring_stds, wh_ratio_clip=1e-6) return adaptive_anchors, mask
def get_det_bboxes(j, rois, scores, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): target_means = [0., 0., 0., 0.], target_stds = [0.033, 0.033, 0.067, 0.067] if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, target_means, target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: bboxes /= scale_factor if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return [j, det_bboxes, det_labels]
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, pos_boxes, reduction_override=None): losses = dict() if cls_score is not None: avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.) losses['loss_cls'] = self.loss_cls( cls_score, labels, label_weights, avg_factor=avg_factor, reduction_override=reduction_override) losses['acc'] = accuracy(cls_score, labels) if bbox_pred is not None: pos_inds = labels > 0 if self.reg_class_agnostic: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds] else: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)[pos_inds, labels[pos_inds]] pos_bbox_pred = delta2bbox(pos_boxes, pos_bbox_pred, self.target_means, self.target_stds) losses['loss_bbox'] = self.loss_bbox( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0), reduction_override=reduction_override) return losses
def _get_det_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_proposals = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).contiguous().view( -1, self.cls_out_channels) scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().view(-1, 4) proposals = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_proposals.append(proposals) mlvl_scores.append(scores) mlvl_proposals = torch.cat(mlvl_proposals) if rescale: mlvl_proposals /= mlvl_proposals.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) det_bboxes, det_labels = multiclass_nms(mlvl_proposals, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def loss_shape_single( self, shape_pred, bbox_anchors, bbox_gts, anchor_weights, anchor_total_num ): shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view(-1, 2) bbox_anchors = bbox_anchors.contiguous().view(-1, 4) bbox_gts = bbox_gts.contiguous().view(-1, 4) anchor_weights = anchor_weights.contiguous().view(-1, 4) bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(), 0) bbox_deltas[:, 2:] += shape_pred # filter out negative samples to speed-up weighted_bounded_iou_loss inds = torch.nonzero(anchor_weights[:, 0] > 0).squeeze(1) bbox_deltas_ = bbox_deltas[inds] bbox_anchors_ = bbox_anchors[inds] bbox_gts_ = bbox_gts[inds] anchor_weights_ = anchor_weights[inds] pred_anchors_ = delta2bbox( bbox_anchors_, bbox_deltas_, self.anchoring_means, self.anchoring_stds, wh_ratio_clip=1e-6, ) loss_shape = self.loss_shape( pred_anchors_, bbox_gts_, anchor_weights_, avg_factor=anchor_total_num ) return loss_shape
def get_track_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if self.use_sigmoid_cls: scores = torch.sigmoid( cls_score) if cls_score is not None else None else: scores = F.softmax( cls_score, dim=1)[:, 1][:, None] if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: bboxes /= scale_factor if cfg is None: bboxes = torch.cat([bboxes, scores], dim=1) return bboxes
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None, im_info=None, ): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:] # TODO: add clip here if rescale: bboxes /= scale_factor # for ct img clip black crop, im_scale = im_info crop = crop.cuda().float() bboxes = map_box_back(bboxes, crop[2], crop[0], im_scale) if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms( bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_preds, centernesses, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_logits = [] mlvl_centerness = [] for cls_score, bbox_pred, centerness, anchors in zip( cls_scores, bbox_preds, centernesses, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] logits = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) scores = logits.sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] logits = logits[topk_inds, :] centerness = centerness[topk_inds] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_logits.append(logits) mlvl_centerness.append(centerness) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) mlvl_logits = torch.cat(mlvl_logits) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) mlvl_centerness = torch.cat(mlvl_centerness) det_bboxes, det_labels, det_logits = multiclass_nms_with_logits( mlvl_bboxes, mlvl_scores, mlvl_logits, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_centerness) return det_bboxes, det_labels, det_logits
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): # if isinstance(cls_score, list): # cls_score = sum(cls_score) / float(len(cls_score)) # scores = F.softmax(cls_score, dim=1) if cls_score is not None else None scores = cls_score if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: if isinstance(scale_factor, float): bboxes /= scale_factor else: bboxes /= torch.from_numpy(scale_factor).to(bboxes.device) if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single_stage1(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """ Transform anchors to refined bboxes """ assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) # mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) return mlvl_bboxes
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:] # TODO: add clip here if rescale: bboxes /= scale_factor if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_det_bboxes( self, rois, cls_score, bbox_pred, img_shape, scale_factor, roi_feats=None, # 新加入的参数 为了得到预测框所对应的map rescale=False, cfg=None): cls_score_for_load = cls_score if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: if isinstance(scale_factor, float): bboxes /= scale_factor else: scale_factor = torch.from_numpy(scale_factor).to(bboxes.device) bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view(bboxes.size()[0], -1) if cfg is None: return bboxes, scores else: # NMS抑制 # bboxes的shape:box数 * 8 # scores的shape:box数 * 2 # det_bboxes的shape:NMS抑制后的数量 * 5 # det_bboxes的shape:NMS抑制后的数量 * 1 det_bboxes, det_labels = multiclass_nms( bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img, rois=rois, cls_score=cls_score_for_load, bbox_pred=bbox_pred, roi_feats=roi_feats # 新加入的参数 为了得到预测框所对应的map ) # print("------------------------------------bbox_head.py--------------------------------------------------") # print("===bboxes:", bboxes.shape) # print("===scores:", scores.shape) # print("===det_bboxes:", det_bboxes.shape) # print("===det_labels:", det_labels.shape) # print("--------------------------------------------------------------------------------------") return det_bboxes, det_labels
def predict_weights(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, anchors, loss_cls, loss_bbox): labels = labels.reshape(-1, ) pos_inds = labels > 0 postive_score = cls_score[pos_inds, labels[pos_inds] - 1].sigmoid() pos_pred = bbox_pred[pos_inds] pos_proposals = anchors[pos_inds] pos_bbox = delta2bbox(pos_proposals, pos_pred, means=self.target_means, stds=self.target_stds) pos_targets = bbox_targets[pos_inds] gt_bboxes = delta2bbox(pos_proposals, pos_targets, means=self.target_means, stds=self.target_stds) ious = bbox_overlaps(gt_bboxes, pos_bbox, is_aligned=True).view(-1, ) total_ious = ious.new_full((pos_inds.numel(),), 0.0) total_ious[pos_inds] = ious total_scores = postive_score.new_full((pos_inds.numel(),), 0.0) total_scores[pos_inds] = postive_score uncertainty_prediction = self.uncertainty_predictor( total_ious, total_scores, loss_cls.sum(dim=1).detach().data, loss_bbox.detach().data ) losses = dict() uncertainty_prediction_cls = uncertainty_prediction[:, 0] uncertainty_prediction_reg = uncertainty_prediction[:, 1] uncertainty_prediction_cls = torch.clamp(uncertainty_prediction_cls, min=self.cls_prediction_min, max=self.cls_prediction_max) uncertainty_prediction_reg = torch.clamp(uncertainty_prediction_reg, min=self.reg_prediction_min, max=self.reg_prediction_max) uncertainty_prediction_cls = torch.ones_like( uncertainty_prediction_cls) * uncertainty_prediction_cls.mean() losses.update({ "loss_uncertainty_cls": uncertainty_prediction_cls.sum() / uncertainty_prediction_cls.numel() * self.uncertainty_cls_weight}) losses.update({ "loss_uncertainty_reg": uncertainty_prediction_reg[ pos_inds].mean() * self.uncertainty_reg_weight}) uncertainty_prediction_reg = torch.exp(-1. * uncertainty_prediction_reg) uncertainty_prediction_cls = torch.exp(-1. * uncertainty_prediction_cls) losses.update({ "cls_prediction_pos": uncertainty_prediction_cls[pos_inds].mean(), "cls_prediction_neg": uncertainty_prediction_cls[~pos_inds].mean(), "cls_prediction_reg": uncertainty_prediction_reg[pos_inds].mean(), }) bbox_weights = bbox_weights.detach().data * uncertainty_prediction_reg.view(-1, 1) label_weights = label_weights.detach().data * uncertainty_prediction_cls.view(-1, 1) return label_weights, bbox_weights, losses
def get_bboxes_single(self, cls_score_list, bbox_pred_list, embed_feats_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_feats = [] feat_chans = embed_feats_list[0].size(1) for cls_score, bbox_pred, feat, anchors in zip(cls_score_list, bbox_pred_list, embed_feats_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) # Feat: [#A, C, H, W] -> [#A*H*W, C] feat = feat.permute(0, 2, 3, 1).reshape(-1, feat_chans) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] feat = feat[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_feats.append(feat) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) mlvl_feats = torch.cat(mlvl_feats) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels, det_feats = multiclass_nms_with_feat( mlvl_bboxes, mlvl_scores, mlvl_feats, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels, det_feats
def get_bboxes_single(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, gt_bboxes, gt_labels, rescale=False, parent_scores=None): """ Transform outputs for a single batch item into labeled boxes. """ assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for idx, (cls_score, bbox_pred, anchors) in enumerate( zip(cls_score_list, bbox_pred_list, mlvl_anchors)): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: if parent_scores is not None and self.use_forest: scores = self.get_forest_based_score( cls_score, parent_scores[idx]) else: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single( self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, ): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert (rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]) anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] proposals = delta2bbox( anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape, ) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_proposals = [] # 遍历每一张特征图得到的anchors进行坐标还原 for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # 按照得分从上万个里面选取2000个最高得分的box if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk(cfg.nms_pre) rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # 偏移转换为真实的bbox作标 proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] # NMS筛选 proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) #将得加入xywh中成为xywhc,便于NMS proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) #每张特征图选完得到的box都加到这里 # 全部的proposal box连接到一起 proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: # proposal再按照分类得分压缩到2000(如果本身就小于2000就只排序) scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) # proposal的数目 _, topk_inds = scores.topk(num) # 按照score排序 proposals = proposals[topk_inds, :] return proposals
def get_bboxes_single(self, cls_scores, bbox_preds, obj_reps, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): # TODO: assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) == len( obj_reps) mlvl_bboxes = [] mlvl_scores = [] mlvl_reps = [] for cls_score, bbox_pred, obj_rep, anchors in zip( cls_scores, bbox_preds, obj_reps, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size( )[-2:] == obj_rep.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) obj_rep = obj_rep.permute(1, 2, 0).reshape(-1, self.rep_channels * 2) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] obj_rep = obj_rep[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_reps.append(obj_rep) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) mlvl_reps = torch.cat(mlvl_reps) det_bboxes, det_labels, det_reps = multiclass_nms_with_extra( mlvl_bboxes, mlvl_scores, mlvl_reps, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels, det_reps
def get_bboxes(self, n_batches, rois, cls_scores, bbox_preds, target_metas, cfg=None,): assert len(cls_scores)==1 and len(bbox_preds)==1 and len(target_metas)==1 cls_scores = cls_scores[0] bbox_preds = bbox_preds[0] target_metas = target_metas[0] bboxes_list = [[] for _ in range(n_batches)] scores_list = [[] for _ in range(n_batches)] if isinstance(cls_scores, list): cls_scores = sum(cls_scores) / float(len(cls_scores)) scores = cls_scores if bbox_preds is not None: bboxes = delta2bbox(rois[:, 1:], bbox_preds, self.target_means, self.target_stds, None) else: bboxes = rois[:, 1:].clone() for i in range(n_batches): inds = torch.nonzero(rois[:, 0]==i).view(-1) bboxes_list[i] = bboxes[inds, :] img_shape = target_metas[inds[0]]['img_shape'] bboxes_list[i][:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes_list[i][:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) scores_list[i] = scores[inds, :] bboxes = bboxes_list scores = scores_list ''' for roi, cls_score, bbox_pred, target_meta in zip(rois, cls_scores, bbox_preds, target_metas): roi = roi.view(1, -1) cls_score = cls_score.view(1, -1) bbox_pred = bbox_pred.view(1, -1) if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) #scores = F.softmax(cls_score, dim=1) if cls_score is not None else None scores = cls_score if bbox_pred is not None: bboxes = delta2bbox(roi[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = roi[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) bboxes_list[int(roi[0, 0])].append(bboxes) scores_list[int(roi[0, 0])].append(scores) bboxes = [torch.cat(bboxes, dim=0) for bboxes in bboxes_list] scores = [torch.cat(scores, dim=0) for scores in scores_list] ''' return bboxes, scores
def get_bboxes_single(self, cls_scores, bbox_preds, bbox_embeds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): mlvl_bboxes = [] mlvl_scores = [] mlvl_embeds = [] # no need to filter out any embeds. for cls_score, bbox_pred, bbox_embed, anchors in zip( cls_scores, bbox_preds, bbox_embeds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().reshape(-1, 4) bbox_embed = bbox_embed.permute(1, 2, 0).contiguous().reshape( -1, self.num_classes * self.inst_embeds) if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(cfg.nms_pre) bbox_pred = bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds, :] bbox_embed = bbox_embed[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_embeds.append(bbox_embed) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: # do not use scale_factor during training. mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) mlvl_embeds = torch.cat(mlvl_embeds) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels, det_embeds = self.multiclass_nms_emb( mlvl_bboxes, mlvl_scores, mlvl_embeds, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels, det_embeds
def get_first_det_bboxes(self, rois, bbox_pred, img_shape): if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) return bboxes
def get_bboxes_single( self, cls_scores, # 包含不同特征层的结果,以便fpn使用 bbox_preds, # cls_scores/bbox_preds/m1v1_anchors的第一维表示哪层特征图 mlvl_anchors, # 假设有3层特征图,则cls_scores[0]-bbox_preds[0]-m1v1_anchors[0]对应 img_shape, # 同理,cls_scores[1]-bbox_preds[1]-m1v1_anchors[1]对应 scale_factor, cfg, rescale=False): mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] rpn_cls_score = rpn_cls_score.permute(1, 2, 0) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(-1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(-1, 2) scores = rpn_cls_score.softmax(dim=1)[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) # nms_pre:NMS前,选出置信度前nms_pre高的anchor if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: _, topk_inds = scores.topk( cfg.nms_pre) # 置信度前nms_pre高的anchor inds rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] anchors = anchors[topk_inds, :] scores = scores[topk_inds] # 将选出的anchor由delta变换得到proposals(x1,y1,x2,y2) proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) if cfg.min_bbox_size > 0: w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) #根据nms_thr完成NMS proposals = proposals[:cfg.nms_post, :] # 选出置信度前nms_post的proposals mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] num = min(cfg.max_num, proposals.shape[0]) _, topk_inds = scores.topk(num) proposals = proposals[topk_inds, :] return proposals
def _get_proposals_single(self, rpn_cls_scores, rpn_bbox_preds, mlvl_anchors, img_shape, coo_num, cfg): mlvl_proposals = [] for idx in range(len(rpn_cls_scores)): rpn_cls_score = rpn_cls_scores[idx] rpn_bbox_pred = rpn_bbox_preds[idx] assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] anchors = mlvl_anchors[idx] if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view(-1) rpn_cls_prob = rpn_cls_score.sigmoid() scores = rpn_cls_prob else: rpn_cls_score = rpn_cls_score.permute(1, 2, 0).contiguous().view( -1, 2) rpn_cls_prob = F.softmax(rpn_cls_score, dim=1) scores = rpn_cls_prob[:, 1] rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).contiguous().view( -1, coo_num) _, order = scores.sort(0, descending=True) if cfg.nms_pre > 0: order = order[:cfg.nms_pre] rpn_bbox_pred = rpn_bbox_pred[order, :] anchors = anchors[order, :] scores = scores[order] if coo_num==4: proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, self.target_stds, img_shape) elif coo_num==8: proposals = delta2bbox_8_coo(anchors, rpn_bbox_pred, img_shape) w = proposals[:, 2] - proposals[:, 0] + 1 h = proposals[:, 3] - proposals[:, 1] + 1 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & (h >= cfg.min_bbox_size)).squeeze() proposals = proposals[valid_inds, :] scores = scores[valid_inds] proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.nms_post, :] mlvl_proposals.append(proposals) proposals = torch.cat(mlvl_proposals, 0) if cfg.nms_across_levels: proposals, _ = nms(proposals, cfg.nms_thr) proposals = proposals[:cfg.max_num, :] else: scores = proposals[:, 4] _, order = scores.sort(0, descending=True) num = min(cfg.max_num, proposals.shape[0]) order = order[:num] proposals = proposals[order, :] return proposals