def forward(self, fg_imgs, fg_txts, bg_imgs, bg_txts, is_test=False): loss = [] encode_list = [] for fg_img, fg_txt, bg_img, bg_txt in zip(fg_imgs, fg_txts, bg_imgs, bg_txts): fg_img_encode = self.encode(fg_img, is_img=True) fg_txt_encode = self.encode(fg_txt, is_txt=True) bg_img_encode = self.encode(bg_img, is_img=True) bg_txt_encode = self.encode(bg_txt, is_txt=True) fg_intra = smooth_l1_loss(fg_img_encode, fg_txt_encode) fg_inter = smooth_l1_loss(fg_img_encode, bg_txt_encode) triplet_fg = fg_intra + self.margin - fg_inter triplet_fg = triplet_fg * (triplet_fg >= 0).float() loss.append(triplet_fg.sum()) bg_intra = smooth_l1_loss(bg_txt_encode, bg_img_encode) bg_inter = smooth_l1_loss(fg_txt_encode, bg_img_encode) triplet_bg = bg_intra + self.margin - bg_inter triplet_bg = triplet_bg * (triplet_bg >= 0).float() loss.append(triplet_bg.sum()) encode_list.append([fg_img_encode, fg_txt_encode]) if is_test: return encode_list else: return loss
def __call__(self, anchors, objectness, box_regression, box_regression_right, targets_left, targets_right): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, regression_targets_right = self.prepare_targets( anchors, targets_left, targets_right) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) _, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness, box_regression_right = \ concat_box_prediction_layers(objectness, box_regression_right) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) regression_targets_right = torch.cat(regression_targets_right, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) box_right_loss = smooth_l1_loss( box_regression_right[sampled_pos_inds], regression_targets_right[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss, box_right_loss
def __call__(self, anchors, objectness, box_regression, box_regression_right, objectness2, box_regression2, box_regression_right2): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ if self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY == 0.0: return 0, 0, 0 _, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness, box_regression_right = \ concat_box_prediction_layers(objectness, box_regression_right) _, box_regression2 = \ concat_box_prediction_layers(objectness2, box_regression2) objectness2, box_regression_right2 = \ concat_box_prediction_layers(objectness2, box_regression_right2) objectness = objectness.squeeze() objectness2 = objectness2.squeeze() box_loss = smooth_l1_loss( box_regression, box_regression2, beta=1.0 / 9, size_average=True, ) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY box_right_loss = smooth_l1_loss( box_regression_right, box_regression_right2, beta=1.0 / 9, size_average=True, ) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY objectness_loss = smooth_l1_loss( objectness, objectness2) * self.cfg.MODEL.RPN_LR.LOSS_HEAD_CONSISTENCY return objectness_loss, box_loss, box_right_loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ #print_log = (self.num_call % 100) == 0 #self.num_call += 1 #if print_log: #all_anchor_sizes_each_pyramid = [[len(a) for a in anchors_per_image] #for anchors_per_image in anchors] anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) #if print_log: #with torch.no_grad(): #all_kind_to_num = get_kind_to_num_info(labels, all_anchor_sizes_each_pyramid) #from qd.qd_common import print_table #print_table(all_kind_to_num) #if self.all_kind_to_num is None: #self.all_kind_to_num = all_kind_to_num #else: #for kind_to_num, self_kind_to_num in zip(all_kind_to_num, self.all_kind_to_num): #for kind, num in kind_to_num.items(): #self_kind_to_num[kind] += num #print_table(self.all_kind_to_num) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss
def bbreg_ml_loss(box_regression, box_regression_un_log, target, size_average=True, eps=0.0, loss_type="Laplace"): if loss_type == "Laplace": loss_all = (box_regression - target).abs() / ( box_regression_un_log.exp() + eps) + box_regression_un_log elif loss_type == "Gauss": loss_all = (box_regression - target)**2 / ( box_regression_un_log.exp() + eps) + box_regression_un_log elif loss_type == "SmoothL1": loss_all = smooth_l1_loss( box_regression, target, size_average=size_average, beta=1, ) return loss_all else: raise ValueError loss = loss_all.sum(-1) if size_average: loss = loss.mean() else: loss = loss.sum() return loss
def __call__(self, class_logits, box_regression, closeup_logits, closeup_labels): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = F.cross_entropy(class_logits, labels) if closeup_logits is not None: closeup_labels = closeup_labels.repeat(len(closeup_logits)) closeup_logits = torch.cat(closeup_logits, dim=0) extra_classification_loss = F.cross_entropy( closeup_logits, closeup_labels) / 10 else: extra_classification_loss = None # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss, extra_classification_loss
def cal_box_loss(self, class_logits, box_regression, proposals): class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = F.cross_entropy(class_logits, labels) self.sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) self.labels_pos = labels[self.sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * self.labels_pos[:, None] + torch.tensor( [0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[self.sampled_pos_inds_subset[:, None], map_inds], regression_targets[self.sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, anchors, objectness, box_regression, targets, debugs=None): """ Arguments: anchors (BoxList): box num: N objectness (list[Tensor]): len=scale_num box_regression (list[Tensor]): len=scale_num targets (list[BoxList]): len = batch size Returns: objectness_loss (Tensor) box_loss (Tensor """ labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds0, sampled_neg_inds0 = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds0, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds0, dim=0)).squeeze(1) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) batch_size = anchors.batch_size() if SHOW_POS_NEG_ANCHORS: self.show_pos_neg_anchors(anchors, sampled_pos_inds, sampled_neg_inds, targets) if SHOW_PRED_POS_ANCHORS: self.show_pos_anchors_pred(box_regression, anchors, objectness, targets, sampled_pos_inds, sampled_neg_inds, regression_targets) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) if CHECK_REGRESSION_TARGET_YAW: rpn_target_yaw = regression_targets[sampled_pos_inds] print(f'max_rpn_target_yaw: {rpn_target_yaw.max()}') print(f'min_rpn_target_yaw: {rpn_target_yaw.min()}') assert rpn_target_yaw.max() < 1.5 assert rpn_target_yaw.min() > -1.5 box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], anchors[sampled_pos_inds].bbox3d, beta=1.0 / 9, size_average=False, yaw_loss_mode=self.yaw_loss_mode, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels.index_select(0, sampled_pos_inds_subset) if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) index_select_indices = ( (sampled_pos_inds_subset[:, None]) * box_regression.size(1) + map_inds).view(-1) box_regression_sampled = box_regression.view(-1).index_select( 0, index_select_indices).view(map_inds.shape[0], map_inds.shape[1]) regression_targets_sampled = regression_targets.index_select( 0, sampled_pos_inds_subset) box_loss = smooth_l1_loss( box_regression_sampled, regression_targets_sampled, size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def box_loss(self, labels, box_regression, regression_targets, bbox3ds): # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing device = box_regression.device sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 7 * labels_pos[:, None] + torch.tensor( [0, 1, 2, 3, 4, 5, 6], device=device) box_regression_pos = box_regression[sampled_pos_inds_subset[:, None], map_inds] regression_targets_pos = regression_targets[sampled_pos_inds_subset] if CHECK_REGRESSION_TARGET_YAW: roi_target_yaw = regression_targets_pos[:, -1] print(f'max_roi_target_yaw: {roi_target_yaw.max()}') print(f'min_roi_target_yaw: {roi_target_yaw.min()}') assert roi_target_yaw.max() < 1.5 assert roi_target_yaw.min() > -1.5 box_loss = smooth_l1_loss( box_regression_pos, regression_targets_pos, bbox3ds[sampled_pos_inds_subset], size_average=False, beta=1 / 5., # 1 yaw_loss_mode=self.yaw_loss_mode) box_loss = box_loss / labels.numel() return box_loss
def __call__(self, anchors, objectness, box_regression, targets): anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) N = len(labels) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) box_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=1.0 / 9, size_average=False, ) / (max(1, pos_inds.numel())) labels = labels.int() objectness_loss = self.box_objectness_loss_func( objectness, labels) / (pos_inds.numel() + N) return objectness_loss, box_loss
def rotation_loss(self, rot_logits, rot_regression, targets): rot_logits = cat(rot_logits, dim=0) rot_regression = cat(rot_regression, dim=0) rot_label_target = cat( [tar.get_field("rotation_label") for tar in targets], dim=0) rot_regression_target = cat( [tar.get_field("rotation_regerssion_target") for tar in targets], dim=0) device = rot_regression.device if (not hasattr(self, "labels_pos")) or (not hasattr( self, "sampled_pos_inds_subset")): raise RuntimeError("cal_box_loss needs to be called before") map_inds = self.labels_pos[:, None] rot_classification_loss = F.cross_entropy( rot_logits[self.sampled_pos_inds_subset], rot_label_target[self.sampled_pos_inds_subset]) * 0.2 rot_regression_loss = smooth_l1_loss( rot_regression[self.sampled_pos_inds_subset, map_inds], rot_regression_target[self.sampled_pos_inds_subset], size_average=False, beta=1, ) rot_regression_loss = rot_regression_loss / rot_regression_target.numel( ) * 0.1 return rot_classification_loss, rot_regression_loss
def __call__(self, anchors, objectness, box_regression, targets, closeup_objectness): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, matched_idxs = self.prepare_targets( anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) matched_idxs = torch.cat(matched_idxs, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) if closeup_objectness is not None: closeup_objectness = torch.cat( [c.view(-1) for c in closeup_objectness]) with torch.no_grad(): fakelabel = torch.ones(closeup_objectness.size(0), device=closeup_objectness.device) #closeup_objectness_loss = F.binary_cross_entropy_with_logits(closeup_objectness, fakelabel) objectness_loss = F.binary_cross_entropy_with_logits( torch.cat([objectness[sampled_inds], closeup_objectness], dim=0), torch.cat([labels[sampled_inds], fakelabel], dim=0)) else: objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) fitz_categories = cat( [proposal.get_field("fitz_categories") for proposal in proposals], dim=0) classification_loss = F.cross_entropy(class_logits, labels, reduction="none") classification_loss = self.augment_loss(classification_loss, fitz_categories, use_mean=True) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + \ torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=None, beta=1, ) box_loss = self.augment_loss(box_loss, fitz_categories[sampled_pos_inds_subset]) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor) """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression ): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1 ) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression ): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1 ) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) # [num_roi, 81] box_regression = cat(box_regression, dim=0) # [num_roi, 81*4] device = class_logits.device # 调用这个函数之前必须已经调用了subsample函数 if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals # labels: 每个roi的标签, 0~80 [num_roi, 1] # 这个label是在match_targets_to_proposals函数中设置的, target[idx]会同时复制labels labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) # regression_targets: [t_x, t_y, t_w, t_h] [num_roi, 4] regression_targets = cat( [proposal.get_field("regression_targets") for proposal in proposals], dim=0 ) # 结合了log_softmax和交叉熵损失 classification_loss = F.cross_entropy(class_logits, labels) # 正样本的索引值 sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) # 正样本的label labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: # 对于每个roi, roi head的box预测值是为每个类别预测一个box # 这里直接选出ground truth类别的预测值 map_inds = 4 * labels_pos[:, None] + torch.tensor( [0, 1, 2, 3], device=device) # 只对正样本计算回归损失 box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, anchors, predictions, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_reg (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_reg_loss (Tensor """ box_cls, box_reg, box_obj = predictions anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, reg_targets = self.prepare_targets(anchors, targets) N = len(labels) box_cls, box_reg = concat_box_prediction_layers(box_cls, box_reg) labels = torch.cat(labels, dim=0) pos_mask, use_mask = labels > 0, labels >= 0 reg_targets = torch.cat(reg_targets, dim=0) pos_inds = torch.nonzero(pos_mask).squeeze(1) pos_numel = pos_inds.numel() retinanet_reg_loss = smooth_l1_loss( box_reg[pos_inds], reg_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_numel * self.regress_norm)) box_cls_labels = torch.zeros_like(box_cls, device=box_cls.device) box_cls_labels[pos_inds, labels[pos_inds].long() - 1] = 1 retinanet_cls_loss = F.binary_cross_entropy_with_logits( box_cls[pos_mask], box_cls_labels[pos_mask], reduction="sum") / max(1, pos_numel) box_obj = concat_box_objectness_layers(box_obj) retinanet_obj_loss = F.binary_cross_entropy_with_logits( box_obj[use_mask].squeeze(), pos_mask[use_mask].float(), reduction="sum") * self.scale / max(1, pos_numel) # loss reweighting with torch.no_grad(): norm = retinanet_reg_loss / (retinanet_cls_loss + retinanet_obj_loss) retinanet_cls_loss *= norm retinanet_obj_loss *= norm return { 'retinanet_cls': retinanet_cls_loss, 'retinanet_reg': retinanet_reg_loss, 'retinanet_obj': retinanet_obj_loss }
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing pos_inds = torch.nonzero(labels > 0).squeeze(1) pos_numel = pos_inds.numel() if pos_numel > 0: classification_loss = F.cross_entropy(class_logits, labels) if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels[pos_inds][:, None] + torch.tensor( [0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[pos_inds[:, None], map_inds], regression_targets[pos_inds], size_average=True, beta=1, ) classification_loss *= self.guided_loss_weighter( box_loss, classification_loss) return classification_loss, box_loss else: zero = pos_inds.new_tensor([0]).float() return zero, zero
def get_masked_loss(box_regression, regression_targets, sampled_pos_inds): box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=True, ) return box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) # if sum(labels>100)>0: # import pdb; pdb.set_trace() # set the not-trained classes as ignore lables>100 -> -1 ignore_index=-1 # labels[labels>(self.num_classfiers-1)] = -1 classification_loss = F.cross_entropy(class_logits, labels, ignore_index=-1) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, anchors, objectness, box_regression, targets, embeddings): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression, embeddings = \ concat_box_prediction_embeddings_layers(objectness, box_regression, embeddings) objectness = objectness.squeeze() embeddings = embeddings.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) # siamese contrastive loss margin = RPNContrastiveLossComputation.PAIR_MARGIN C_loss = ContrastiveLoss(margin) # hard negtive mining version, TODO: no sampled embeddings1, embeddings2, targets = pair_embeddings( embeddings[sampled_inds], labels[sampled_inds]) # print("===============================================",anchor_embeddings.shape, positive_embeddings.shape) pair_loss = C_loss(embeddings1, embeddings2, targets) # TODO dynamic incremental margin # if triplet_loss == 0 and np.random.random() > 0.5: # RPNTripletLossComputation.TRIPLET_MARGIN += 1 return objectness_loss, box_loss, pair_loss
def __call__(self, anchors, box_cls, box_regression, targets, search=False): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, matched_targets_boxes = self.prepare_targets( anchors, targets, True) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) retinanet_regression_loss, box_loss_vec = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, return_loss_vec=True) labels = labels.int() retinanet_regression_loss /= (max(1, pos_inds.numel() * self.regress_norm)) retinanet_cls_loss, cls_loss_vec = self.box_cls_loss_func( box_cls, labels, return_loss_vec=True) retinanet_cls_loss /= (pos_inds.numel() + N) if search: loss_scale = self._count_loss_scale( matched_targets_boxes, pos_inds, box_loss_vec / (max(1, pos_inds.numel() * self.regress_norm)), cls_loss_vec[pos_inds] / (pos_inds.numel() + N)) else: loss_scale = None return retinanet_cls_loss, retinanet_regression_loss, loss_scale #, ratio_small
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, areas = self.prepare_targets( anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) areas = torch.cat(areas, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) # objectness_loss = objectness_loss_func( # objectness[sampled_inds], labels[sampled_inds] # ) / (sampled_inds.numel()) objectness_loss = self.objectness_loss['fn'](objectness[sampled_inds], labels[sampled_inds], areas=areas[sampled_inds]) if self.objectness_loss['avg']: objectness_loss /= sampled_inds.numel() return objectness_loss, box_loss
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers_2(box_cls, box_regression) labels = torch.stack(labels, dim=0) regression_targets = torch.stack(regression_targets, dim=0) _retinanet_regression_loss = [] _retinanet_cls_loss = [] lab_sum = len(torch.nonzero(labels > 0).squeeze(1)) for _labels,_box_regression,_regression_targets,_box_cls in zip( labels.split(1,dim=0),box_regression.split(1,dim=0),regression_targets.split(1,dim=0),box_cls.split(1,dim=0) ): _labels = _labels.squeeze(0) _box_regression = _box_regression.squeeze(0) _regression_targets = _regression_targets.squeeze(0) _box_cls = _box_cls.squeeze(0) pos_inds = torch.nonzero(_labels > 0).squeeze(1) _retinanet_regression_loss.append(smooth_l1_loss( _box_regression[pos_inds], _regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, lab_sum * self.regress_norm))) _labels = _labels.int() _retinanet_cls_loss.append(self.box_cls_loss_func( _box_cls, _labels ) / (lab_sum + N)) return _retinanet_cls_loss,_retinanet_regression_loss
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) if torch.isinf(regression_targets.sum()) or torch.isnan(regression_targets.sum()): print("nanananna") print("nanananannanan") pos_inds = torch.nonzero(labels > 0).squeeze(1) retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / (pos_inds.numel() + N) # retinanet_reg_max = box_regression.max() if torch.isnan(retinanet_regression_loss.sum()) or torch.isnan(retinanet_cls_loss.sum()) \ or torch.isinf(retinanet_regression_loss.sum()) or torch.isinf(retinanet_cls_loss.sum()): print("nananananna") print("amanannana") pass return cfg.MODEL.RETINANET.CLS_WEIGHT * retinanet_cls_loss, cfg.MODEL.RETINANET.LOC_WEIGHT * retinanet_regression_loss
def __call__(self, keypoint_offset_pred): ''' :param proposals: (list[BoxList]) :param keypoint_offset_pred: :return: ''' bb8_keypoint_offset_targets = [] positive_inds = [] for proposals_per_image in self._proposals: bb8kp = proposals_per_image.get_field("bb8keypoints") labels_per_image = proposals_per_image.get_field("labels") positive_inds_per_image = torch.nonzero( labels_per_image > 0).squeeze(1) bb8kp = bb8kp[positive_inds_per_image] positive_proposals = proposals_per_image[positive_inds_per_image] # compute bb8keypoint offset regression targets regression_targets_per_image = bb8keypoint_offset_encode( bb8kp.keypoints, positive_proposals.bbox) bb8_keypoint_offset_targets.append(regression_targets_per_image) positive_inds.append(positive_inds_per_image) bb8_keypoint_offset_targets = cat(bb8_keypoint_offset_targets, dim=0) positive_inds = cat(positive_inds, dim=0) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing, for class-specific regression # sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) # labels_pos = labels[sampled_pos_inds_subset] # map_inds = 16 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, # 9, 10, 11, 12, 13, 14, 15], device=device) if bb8_keypoint_offset_targets.numel() == 0: return bb8_keypoint_offset_targets.sum() * 0 # print("keypoint_offset_pred.device:{}".format(keypoint_offset_pred.device)) # print("keypoint_offset_target.device:{}".format(bb8_keypoint_offset_targets.device)) keypoint_loss = smooth_l1_loss( keypoint_offset_pred[positive_inds], bb8_keypoint_offset_targets, size_average=False, beta=1, ) keypoint_loss = keypoint_loss / keypoint_offset_pred.shape[0] return keypoint_loss
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets, matched_targets_boxes = self.prepare_targets(anchors, targets, True) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) matched_targets_boxes = torch.cat(matched_targets_boxes, dim=0) reference_boxes = matched_targets_boxes[pos_inds] gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] gt_areas = gt_widths * gt_heights small_index = gt_areas < 1024 retinanet_regression_loss, box_loss_vec = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, return_loss_vec=True ) #/ (max(1, pos_inds.numel() * self.regress_norm)) ratio_small = box_loss_vec[small_index].sum()/retinanet_regression_loss labels = labels.int() retinanet_regression_loss /= (max(1, pos_inds.numel() * self.regress_norm)) retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / (pos_inds.numel() + N) return retinanet_cls_loss, retinanet_regression_loss, ratio_small
def loss_obj(self, predictions, labels, regression_targets): class_logits, box_regression, obj_logits = predictions device = class_logits.device obj_logits.squeeze_() pos_mask = labels > 0 pos_inds = pos_mask.nonzero().squeeze(1) labels_pos = labels[pos_inds] - 1 pos_numel = pos_inds.numel() class_logits_labels = torch.zeros_like(class_logits) class_logits_labels[pos_inds, labels_pos] = 1 objectness_loss = F.binary_cross_entropy_with_logits( obj_logits, pos_mask.float()) classification_loss = F.binary_cross_entropy_with_logits( class_logits[pos_inds], class_logits_labels[pos_inds], reduction="sum") / max(1, pos_numel) if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([0, 1, 2, 3], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[pos_inds[:, None], map_inds], regression_targets[pos_inds], size_average=True, beta=1, ) # loss reweighting if pos_numel > 0: objectness_loss *= self.scale classification_loss /= self.scale else: box_loss.zero_() classification_loss.zero_() return { 'roi_cls': classification_loss, 'roi_reg': box_loss, 'roi_obj': objectness_loss }
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[list[BoxList]]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor) """ # 分别将每一个图片的不同FPN层中生成的锚点合并起来 anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] # 分别得到每一个图片的所有锚点相对应的基准边框的列表 labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( # 计算锚点边框损失,只是用随机选择的有目标的锚点进行计算 box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat( [proposal.get_field("regression_targets") for proposal in proposals], dim=0 ) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) labels = labels.int() a = {} for c in labels: if not str(c.cpu().numpy()) in a.keys(): a[str(c.cpu().numpy())] = 0 a[str(c.cpu().numpy())] += 1 retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / (pos_inds.numel() + N) return retinanet_cls_loss, retinanet_regression_loss