class RegressLoss(nn.Module): def __init__(self, func='smooth'): super(RegressLoss, self).__init__() self.box_coder = BoxCoder() if func == 'smooth': self.criteron = smooth_l1_loss elif func == 'mse': self.criteron = F.mse_loss elif func == 'balanced': self.criteron = balanced_l1_loss else: raise NotImplementedError def forward(self, regressions, anchors, annotations, iou_thres=0.5): losses = [] batch_size = regressions.shape[0] all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht') for j in range(batch_size): regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] pred_boxes = all_pred_boxes[j, :, :] if bbox_annotation.shape[0] == 0: losses.append(torch.tensor(0).float().cuda()) continue indicator = bbox_overlaps( min_area_square(anchors[j, :, :]), min_area_square(bbox_annotation[:, :-1]) ) overlaps = rbox_overlaps( anchors[j, :, :].cpu().numpy(), bbox_annotation[:, :-1].cpu().numpy(), indicator.cpu().numpy(), thresh=1e-1 ) if not torch.is_tensor(overlaps): overlaps = torch.from_numpy(overlaps).cuda() iou_max, iou_argmax = torch.max(overlaps, dim=1) positive_indices = torch.ge(iou_max, iou_thres) # MaxIoU assigner max_gt, argmax_gt = overlaps.max(0) if (max_gt < iou_thres).any(): positive_indices[argmax_gt[max_gt < iou_thres]]=1 assigned_annotations = bbox_annotation[iou_argmax, :] if positive_indices.sum() > 0: all_rois = anchors[j, positive_indices, :] gt_boxes = assigned_annotations[positive_indices, :] targets = self.box_coder.encode(all_rois, gt_boxes) loss = self.criteron(regression[positive_indices, :], targets) losses.append(loss) else: losses.append(torch.tensor(0).float().cuda()) return torch.stack(losses).mean(dim=0, keepdim=True)
class RetinaNet(nn.Module): def __init__(self, backbone='res50', hyps=None): super(RetinaNet, self).__init__() self.num_classes = int(hyps['num_classes']) + 1 self.anchor_generator = Anchors(ratios=np.array([0.5, 1, 2]), ) self.num_anchors = self.anchor_generator.num_anchors self.init_backbone(backbone) self.fpn = FPN(in_channels_list=self.fpn_in_channels, out_channels=256, top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256), use_asff=False) self.cls_head = CLSHead(in_channels=256, feat_channels=256, num_stacked=4, num_anchors=self.num_anchors, num_classes=self.num_classes) self.reg_head = REGHead( in_channels=256, feat_channels=256, num_stacked=4, num_anchors=self.num_anchors, num_regress=5 # xywha ) self.loss = IntegratedLoss(func='smooth') # self.loss_var = KLLoss() self.box_coder = BoxCoder() def init_backbone(self, backbone): if backbone == 'res34': self.backbone = models.resnet34(pretrained=True) self.fpn_in_channels = [128, 256, 512] elif backbone == 'res50': self.backbone = models.resnet50(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'res101': self.backbone = models.resnet101(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'res152': self.backbone = models.resnet152(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'resnext50': self.backbone = models.resnext50_32x4d(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] else: raise NotImplementedError del self.backbone.avgpool del self.backbone.fc def ims_2_features(self, ims): c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims))) c2 = self.backbone.layer1(self.backbone.maxpool(c1)) c3 = self.backbone.layer2(c2) c4 = self.backbone.layer3(c3) c5 = self.backbone.layer4(c4) #c_i shape: bs,C,H,W return [c3, c4, c5] def forward(self, ims, gt_boxes=None, test_conf=None, process=None): anchors_list, offsets_list, cls_list, var_list = [], [], [], [] original_anchors = self.anchor_generator( ims) # (bs, num_all_achors, 5) anchors_list.append(original_anchors) features = self.fpn(self.ims_2_features(ims)) cls_score = torch.cat([self.cls_head(feature) for feature in features], dim=1) bbox_pred = torch.cat([self.reg_head(feature) for feature in features], dim=1) bboxes = self.box_coder.decode(anchors_list[-1], bbox_pred, mode='xywht').detach() if self.training: losses = dict() bf_weight = self.calc_mining_param(process, 0.3) losses['loss_cls'], losses['loss_reg'] = self.loss(cls_score, bbox_pred, anchors_list[-1], bboxes, gt_boxes, \ md_thres=0.6, mining_param=(bf_weight, 1-bf_weight, 5) ) return losses else: # eval() mode return self.decoder(ims, anchors_list[-1], cls_score, bbox_pred, test_conf=test_conf) def decoder(self, ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None): if test_conf is not None: thresh = test_conf bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht') bboxes = clip_boxes(bboxes, ims) scores = torch.max(cls_score, dim=2, keepdim=True)[0] keep = (scores >= thresh)[0, :, 0] if keep.sum() == 0: return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)] scores = scores[:, keep, :] anchors = anchors[:, keep, :] cls_score = cls_score[:, keep, :] bboxes = bboxes[:, keep, :] # NMS anchors_nms_idx = nms( torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh) nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1) output_boxes = torch.cat( [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]], dim=1) return [nms_scores, nms_class, output_boxes] def freeze_bn(self): for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval() def calc_mining_param(self, process, alpha): if process < 0.1: bf_weight = 1.0 elif process > 0.3: bf_weight = alpha else: bf_weight = 5 * (alpha - 1) * process + 1.5 - 0.5 * alpha return bf_weight
class IntegratedLoss(nn.Module): def __init__(self, alpha=0.25, gamma=2.0, func = 'smooth'): super(IntegratedLoss, self).__init__() self.alpha = alpha self.gamma = gamma self.box_coder = BoxCoder() if func == 'smooth': self.criteron = smooth_l1_loss elif func == 'mse': self.criteron = F.mse_loss elif func == 'balanced': self.criteron = balanced_l1_loss def forward(self, classifications, regressions, anchors, annotations,iou_thres=0.5): cls_losses = [] reg_losses = [] batch_size = classifications.shape[0] all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht') for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] pred_boxes = all_pred_boxes[j, :, :] if bbox_annotation.shape[0] == 0: cls_losses.append(torch.tensor(0).float().cuda()) reg_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) indicator = bbox_overlaps( min_area_square(anchors[j, :, :]), min_area_square(bbox_annotation[:, :-1]) ) ious = rbox_overlaps( anchors[j, :, :].cpu().numpy(), bbox_annotation[:, :-1].cpu().numpy(), indicator.cpu().numpy(), thresh=1e-1 ) if not torch.is_tensor(ious): ious = torch.from_numpy(ious).cuda() iou_max, iou_argmax = torch.max(ious, dim=1) positive_indices = torch.ge(iou_max, iou_thres) max_gt, argmax_gt = ious.max(0) if (max_gt < iou_thres).any(): positive_indices[argmax_gt[max_gt < iou_thres]]=1 # cls loss cls_targets = (torch.ones(classification.shape) * -1).cuda() cls_targets[torch.lt(iou_max, iou_thres - 0.1), :] = 0 num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[iou_argmax, :] cls_targets[positive_indices, :] = 0 cls_targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1 alpha_factor = torch.ones(cls_targets.shape).cuda() * self.alpha alpha_factor = torch.where(torch.eq(cls_targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(cls_targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bin_cross_entropy = -(cls_targets * torch.log(classification+1e-6) + (1.0 - cls_targets) * torch.log(1.0 - classification+1e-6)) cls_loss = focal_weight * bin_cross_entropy cls_loss = torch.where(torch.ne(cls_targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) cls_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # reg loss if positive_indices.sum() > 0: all_rois = anchors[j, positive_indices, :] gt_boxes = assigned_annotations[positive_indices, :] reg_targets = self.box_coder.encode(all_rois, gt_boxes) reg_loss = self.criteron(regression[positive_indices, :], reg_targets) reg_losses.append(reg_loss) if not torch.isfinite(reg_loss) : import ipdb; ipdb.set_trace() else: reg_losses.append(torch.tensor(0).float().cuda()) loss_cls = torch.stack(cls_losses).mean(dim=0, keepdim=True) loss_reg = torch.stack(reg_losses).mean(dim=0, keepdim=True) return loss_cls, loss_reg
class RetinaNet(nn.Module): def __init__(self, backbone='res50', hyps=None): super(RetinaNet, self).__init__() self.num_classes = int( hyps['num_classes'] ) + 1 #这里的num-class由分类级别决定,hrsc_dataset.py文件中分三个级别,所以训练时的level也需要改 self.anchor_generator = Anchors(ratios=np.array([0.5, 1, 2]), ) self.num_anchors = self.anchor_generator.num_anchors self.init_backbone(backbone) self.fpn = FPN(in_channels_list=self.fpn_in_channels, out_channels=256, top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256), use_asff=False) self.cls_head = CLSHead(in_channels=256, feat_channels=256, num_stacked=4, num_anchors=self.num_anchors, num_classes=self.num_classes) self.reg_head = REGHead( in_channels=256, feat_channels=256, num_stacked=4, num_anchors=self.num_anchors, num_regress=5 # xywha ) self.loss = IntegratedLoss(func='smooth') #计算损失函数 # self.loss_var = KLLoss() self.box_coder = BoxCoder() #计算回归值 def init_backbone(self, backbone): if backbone == 'res34': self.backbone = models.resnet34(pretrained=True) self.fpn_in_channels = [128, 256, 512] elif backbone == 'res50': self.backbone = models.resnet50(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'res101': self.backbone = models.resnet101(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'res152': self.backbone = models.resnet152(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'resnext50': self.backbone = models.resnext50_32x4d(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] else: raise NotImplementedError del self.backbone.avgpool del self.backbone.fc def ims_2_features(self, ims): c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims))) c2 = self.backbone.layer1(self.backbone.maxpool(c1)) c3 = self.backbone.layer2(c2) c4 = self.backbone.layer3(c3) c5 = self.backbone.layer4(c4) #c_i shape: bs,C,H,W return [c3, c4, c5] def forward(self, ims, gt_boxes=None, test_conf=None, process=None): anchors_list, offsets_list, cls_list, var_list = [], [], [], [] original_anchors = self.anchor_generator( ims) # 尺度=(batchsize, num_all_achors, 5) anchors_list.append(original_anchors) # 经过网络计算的特征图---两个---一个用于分类一个用于回归 features = self.fpn(self.ims_2_features(ims)) cls_score = torch.cat([self.cls_head(feature) for feature in features], dim=1) bbox_pred = torch.cat([self.reg_head(feature) for feature in features], dim=1) # 获取回归的box bboxes = self.box_coder.decode(anchors_list[-1], bbox_pred, mode='xywht').detach() if self.training: # 如果是训练,则返回损失---分类损失和回归损失---通过loss函数计算 losses = dict() bf_weight = self.calc_mining_param( process, 0.3) # 逐步调整输入iou对匹配度的影响,也就是跟随训练进度来调整alpha # 所有框分类得分特征图(类别)、预测框特征图(偏移)、原始anchor、预测框回归值(真实坐标角度)、标签框(最后一位应该代表的是类别,只有1,否则-1) losses['loss_cls'], losses['loss_reg'] = self.loss(cls_score, bbox_pred, anchors_list[-1], bboxes, gt_boxes, \ md_thres=0.6, mining_param=(bf_weight, 1-bf_weight, 5) ) return losses else: # eval() mode 如果不是训练---则返回的是[nms_scores, nms_class, output_boxes] return self.decoder(ims, anchors_list[-1], cls_score, bbox_pred, test_conf=test_conf) # decode解码是返回检测框(两个坐标)以及角度---encoder编码是计算偏移-用于计算损失 def decoder(self, ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None): if test_conf is not None: thresh = test_conf bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht') #返回真实的预测框,两个坐标以及角度 bboxes = clip_boxes(bboxes, ims) scores = torch.max(cls_score, dim=2, keepdim=True)[0] keep = (scores >= thresh)[0, :, 0] if keep.sum() == 0: return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)] scores = scores[:, keep, :] anchors = anchors[:, keep, :] cls_score = cls_score[:, keep, :] bboxes = bboxes[:, keep, :] # NMS anchors_nms_idx = nms( torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh) nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1) output_boxes = torch.cat( [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]], dim=1) return [nms_scores, nms_class, output_boxes] def freeze_bn(self): for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval() def calc_mining_param(self, process, alpha): # 逐步调整输入iou对匹配度的影响 if process < 0.1: bf_weight = 1.0 elif process > 0.3: bf_weight = alpha else: bf_weight = 5 * (alpha - 1) * process + 1.5 - 0.5 * alpha return bf_weight
class STELA(nn.Module): def __init__(self, backbone='res50', num_classes=2, num_refining=1): super(STELA, self).__init__() self.anchor_generator = Anchors() self.num_anchors = self.anchor_generator.num_anchors self.init_backbone(backbone) self.fpn = FPN(in_channels_list=self.fpn_in_channels, out_channels=256, top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256)) self.cls_head = CLSHead(in_channels=256, feat_channels=256, num_stacked=1, num_anchors=self.num_anchors, num_classes=num_classes) self.reg_head = REGHead(in_channels=256, feat_channels=256, num_stacked=1, num_anchors=self.num_anchors, num_regress=5) self.num_refining = num_refining if self.num_refining > 0: self.ref_heads = nn.ModuleList([ REGHead(in_channels=256, feat_channels=256, num_stacked=1, num_anchors=self.num_anchors, num_regress=5) for _ in range(self.num_refining) ]) self.loss_ref = RegressLoss(func='smooth') self.loss_cls = FocalLoss() self.loss_reg = RegressLoss(func='smooth') self.box_coder = BoxCoder() def init_backbone(self, backbone): if backbone == 'res34': self.backbone = models.resnet34(pretrained=True) self.fpn_in_channels = [128, 256, 512] elif backbone == 'res50': self.backbone = models.resnet50(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] elif backbone == 'resnext50': self.backbone = models.resnext50_32x4d(pretrained=True) self.fpn_in_channels = [512, 1024, 2048] else: raise NotImplementedError del self.backbone.avgpool del self.backbone.fc def ims_2_features(self, ims): c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims))) c2 = self.backbone.layer1(self.backbone.maxpool(c1)) c3 = self.backbone.layer2(c2) c4 = self.backbone.layer3(c3) c5 = self.backbone.layer4(c4) return [c3, c4, c5] def forward(self, ims, gt_boxes=None): anchors_list, offsets_list = [], [] original_anchors = self.anchor_generator(ims) anchors_list.append(original_anchors) features = self.fpn(self.ims_2_features(ims)) # anchor refining if self.num_refining > 0: for i in range(self.num_refining): bbox_pred = torch.cat( [self.ref_heads[i](feature) for feature in features], dim=1) refined_anchors = self.box_coder.decode(anchors_list[-1], bbox_pred, mode='wht').detach() anchors_list.append(refined_anchors) offsets_list.append(bbox_pred) cls_score = torch.cat([self.cls_head(feature) for feature in features], dim=1) bbox_pred = torch.cat([self.reg_head(feature) for feature in features], dim=1) if self.training: losses = dict() if self.num_refining > 0: ref_losses = [] for i in range(self.num_refining): ref_losses.append( self.loss_ref(offsets_list[i], anchors_list[i], gt_boxes, iou_thresh=(0.3 + i * 0.1))) losses['loss_ref'] = torch.stack(ref_losses).mean(dim=0, keepdim=True) losses['loss_cls'] = self.loss_cls(cls_score, anchors_list[-1], gt_boxes, iou_thresh=0.5) losses['loss_reg'] = self.loss_reg(bbox_pred, anchors_list[-1], gt_boxes, iou_thresh=0.5) return losses else: return self.decoder(ims, anchors_list[-1], cls_score, bbox_pred) def decoder(self, ims, anchors, cls_score, bbox_pred, thresh=0.3, nms_thresh=0.3): bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht') bboxes = clip_boxes(bboxes, ims) scores = torch.max(cls_score, dim=2, keepdim=True)[0] keep = (scores >= thresh)[0, :, 0] if keep.sum() == 0: return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)] scores = scores[:, keep, :] anchors = anchors[:, keep, :] cls_score = cls_score[:, keep, :] bboxes = bboxes[:, keep, :] anchors_nms_idx = nms( torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh) nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1) output_boxes = torch.cat( [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]], dim=1) return [nms_scores, nms_class, output_boxes] def freeze_bn(self): for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval()