def losses(self, gt_classes, reg_targets, pred_class_logits, pred_box_reg): pred_class_logits, pred_box_reg = \ permute_and_concat(pred_class_logits, pred_box_reg, self.num_classes) # Shapes: (N x R) and (N x R, 4), (N x R) respectively. gt_classes = gt_classes.flatten() reg_targets = reg_targets.view(-1, 4) foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes) pos_inds = torch.nonzero(foreground_idxs).squeeze(1) num_gpus = get_num_gpus() # sync num_pos from all gpus total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel() ])).item() num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0) gt_classes_target = torch.zeros_like(pred_class_logits) gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1 self.loss_normalizer = ( self.loss_normalizer_momentum * self.loss_normalizer + (1 - self.loss_normalizer_momentum) * total_num_pos) # logits loss cls_loss = sigmoid_focal_loss_jit( pred_class_logits, gt_classes_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / num_pos_avg_per_gpu if pos_inds.numel() > 0: # reg_loss = smooth_l1_loss( # pred_box_reg[foreground_idxs], reg_targets[foreground_idxs], # 0.11, reduction='sum') / num_pos_avg_per_gpu / max(1, self.loss_normalizer) # gt_center_score = compute_centerness_targets(reg_targets[foreground_idxs]) # # average sum_centerness_targets from all gpus, # # which is used to normalize centerness-weighed reg loss # sum_centerness_targets_avg_per_gpu = \ # reduce_sum(gt_center_score.sum()).item() / float(num_gpus) reg_loss = iou_loss( pred_box_reg[foreground_idxs], reg_targets[foreground_idxs], loss_type=self.iou_loss_type) / num_pos_avg_per_gpu # centerness_loss = F.binary_cross_entropy_with_logits( # pred_center_score[foreground_idxs], gt_center_score, reduction='sum' # ) / num_pos_avg_per_gpu else: reg_loss = pred_box_reg[foreground_idxs].sum() reduce_sum(pred_center_score[foreground_idxs].new_tensor([0.0])) # centerness_loss = pred_center_score[foreground_idxs].sum() return dict(cls_loss=cls_loss, reg_loss=reg_loss)
def losses(self, labels, reg_targets, box_cls, box_regression, centerness): N, num_classes = box_cls[0].shape[:2] box_cls_flatten = [] box_regression_flatten = [] centerness_flatten = [] labels_flatten = [] reg_targets_flatten = [] for l in range(len(labels)): box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape( -1, num_classes)) box_regression_flatten.append(box_regression[l].permute( 0, 2, 3, 1).reshape(-1, 4)) labels_flatten.append(labels[l].reshape(-1)) reg_targets_flatten.append(reg_targets[l].reshape(-1, 4)) centerness_flatten.append(centerness[l].reshape(-1)) box_cls_flatten = torch.cat(box_cls_flatten, dim=0) box_regression_flatten = torch.cat(box_regression_flatten, dim=0) centerness_flatten = torch.cat(centerness_flatten, dim=0) labels_flatten = torch.cat(labels_flatten, dim=0) reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0) pos_inds = torch.nonzero((labels_flatten >= 0) & ( labels_flatten != self.num_classes)).squeeze(1) box_regression_flatten = box_regression_flatten[pos_inds] reg_targets_flatten = reg_targets_flatten[pos_inds] centerness_flatten = centerness_flatten[pos_inds] num_gpus = get_num_gpus() # sync num_pos from all gpus total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel() ])).item() num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0) gt_classes_target = torch.zeros_like(box_cls_flatten) foreground_idxs = (labels_flatten >= 0) & (labels_flatten != self.num_classes) gt_classes_target[foreground_idxs, labels_flatten[foreground_idxs]] = 1 cls_loss = sigmoid_focal_loss_jit( box_cls_flatten, gt_classes_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / num_pos_avg_per_gpu if pos_inds.numel() > 0: centerness_targets = compute_centerness_targets( reg_targets_flatten) # average sum_centerness_targets from all gpus, # which is used to normalize centerness-weighed reg loss sum_centerness_targets_avg_per_gpu = \ reduce_sum(centerness_targets.sum()).item() / float(num_gpus) reg_loss = iou_loss(box_regression_flatten, reg_targets_flatten, centerness_targets, loss_type=self.iou_loss_type ) / sum_centerness_targets_avg_per_gpu centerness_loss = F.binary_cross_entropy_with_logits( centerness_flatten, centerness_targets, reduction='sum') / num_pos_avg_per_gpu else: reg_loss = box_regression_flatten.sum() reduce_sum(centerness_flatten.new_tensor([0.0])) centerness_loss = centerness_flatten.sum() return dict(cls_loss=cls_loss, reg_loss=reg_loss, centerness_loss=centerness_loss)
def losses(self, init_gt_classes, init_reg_targets, refine_gt_classes, refine_reg_targets, \ pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, strides, pred_ratio): strides = strides.repeat(pred_class_logits[0].shape[0]) # [N*X] pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, pred_ratio = \ permute_and_concat(pred_class_logits, pred_box_reg_init, pred_box_reg, pred_center_score, pred_ratio, self.num_classes) # Shapes: (N x R) and (N x R, 4), (N x R) respectively. init_gt_classes = init_gt_classes.flatten() init_reg_targets = init_reg_targets.view(-1, 4) init_foreground_idxs = (init_gt_classes >= 0) & (init_gt_classes != self.num_classes) init_pos_inds = torch.nonzero(init_foreground_idxs).squeeze(1) num_gpus = get_num_gpus() # sync num_pos from all gpus init_total_num_pos = reduce_sum(init_pos_inds.new_tensor([init_pos_inds.numel()])).item() init_num_pos_avg_per_gpu = max(init_total_num_pos / float(num_gpus), 1.0) refine_gt_classes = refine_gt_classes.flatten() refine_reg_targets = refine_reg_targets.view(-1, 4) refine_foreground_idxs = (refine_gt_classes >= 0) & (refine_gt_classes != self.num_classes) refine_pos_inds = torch.nonzero(refine_foreground_idxs).squeeze(1) # sync num_pos from all gpus refine_total_num_pos = reduce_sum(refine_pos_inds.new_tensor([refine_pos_inds.numel()])).item() refine_num_pos_avg_per_gpu = max(refine_total_num_pos / float(num_gpus), 1.0) gt_classes_target = torch.zeros_like(pred_class_logits) gt_classes_target[refine_foreground_idxs, refine_gt_classes[refine_foreground_idxs]] = 1 # logits loss cls_loss = sigmoid_focal_loss_jit( pred_class_logits, gt_classes_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / refine_num_pos_avg_per_gpu init_foreground_targets = init_reg_targets[init_foreground_idxs] gt_ratio_1 = (init_foreground_targets[:,0] + init_foreground_targets[:,2]) \ / (init_foreground_targets[:,1] + init_foreground_targets[:,3]) gt_ratio_2 = 1 / gt_ratio_1 gt_ratios = torch.stack((gt_ratio_1,gt_ratio_2), dim = 1) gt_ratio = gt_ratios.min(dim=1)[0] gt_center_score = compute_centerness_targets(init_reg_targets[init_foreground_idxs], gt_ratio) # average sum_centerness_targets from all gpus, # which is used to normalize centerness-weighed reg loss sum_centerness_targets_avg_per_gpu = \ reduce_sum(gt_center_score.sum()).item() / float(num_gpus) reg_loss_init = iou_loss( pred_box_reg_init[init_foreground_idxs], init_reg_targets[init_foreground_idxs], gt_center_score, loss_type=self.iou_loss_type ) / sum_centerness_targets_avg_per_gpu coords_norm_refine = strides[refine_foreground_idxs].unsqueeze(-1) * 4 reg_loss = smooth_l1_loss( pred_box_reg[refine_foreground_idxs] / coords_norm_refine, refine_reg_targets[refine_foreground_idxs] / coords_norm_refine, 0.11, reduction="sum") / max(1, refine_num_pos_avg_per_gpu) # reg_loss = iou_loss( # pred_box_reg[refine_foreground_idxs], refine_reg_targets[refine_foreground_idxs], 1, # loss_type=self.iou_loss_type # ) / sum_centerness_targets_avg_per_gpu centerness_loss = F.binary_cross_entropy_with_logits( torch.pow(torch.abs(pred_center_score[init_foreground_idxs]), pred_ratio[init_foreground_idxs]), gt_center_score, reduction='sum' ) / init_num_pos_avg_per_gpu return dict(cls_loss=cls_loss, reg_loss_init=reg_loss_init, reg_loss=reg_loss, centerness_loss=centerness_loss)
def losses(self, locations, class_logits, center_score, box_reg_init, box_reg, gt_instances): gt_classes, loc_targets, topk_locations = self.get_ground_truth( locations, gt_instances) class_logits, box_reg_init, box_reg, center_score = permute_and_concat_v2( class_logits, box_reg_init, box_reg, center_score, self.num_classes) # Shapes: (N x R) and (N x R, 4), (N x R) respectively. gt_classes = gt_classes.flatten() loc_targets = loc_targets.view(-1, 4) foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes) pos_inds = torch.nonzero(foreground_idxs).squeeze(1) num_gpus = get_num_gpus() # sync num_pos from all gpus total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel() ])).item() num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0) gt_classes_target = torch.zeros_like(class_logits) gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1 # logits loss cls_loss = sigmoid_focal_loss_jit( class_logits, gt_classes_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / num_pos_avg_per_gpu if pos_inds.numel() > 0: if self.slender_centerness: gt_center_score = compute_slender_centerness_targets( loc_targets[foreground_idxs]) else: gt_center_score = compute_centerness_targets( loc_targets[foreground_idxs]) # average sum_centerness_targets from all gpus, # which is used to normalize centerness-weighed reg loss sum_centerness_targets_avg_per_gpu = \ reduce_sum(gt_center_score.sum()).item() / float(num_gpus) topk_locations = topk_locations.view(-1) topk_gt_center_score = compute_centerness_targets( loc_targets[topk_locations]) sum_topk_centerness_targets_avg_per_gpu = \ reduce_sum(topk_gt_center_score.sum()).item() / float(num_gpus) loss_loc_init = iou_loss( box_reg_init[topk_locations], loc_targets[topk_locations], topk_gt_center_score, loss_type=self.iou_loss_type ) / sum_topk_centerness_targets_avg_per_gpu loss_loc_refine = iou_loss(box_reg[foreground_idxs], loc_targets[foreground_idxs], gt_center_score, loss_type=self.iou_loss_type ) / sum_centerness_targets_avg_per_gpu centerness_loss = F.binary_cross_entropy_with_logits( center_score[foreground_idxs], gt_center_score, reduction='sum') / num_pos_avg_per_gpu else: loss_loc_init = box_reg_init[foreground_idxs].sum() loss_loc_refine = box_reg[foreground_idxs].sum() reduce_sum(center_score[foreground_idxs].new_tensor([0.0])) centerness_loss = center_score[foreground_idxs].sum() return dict( loss_cls=cls_loss * self.loss_cls_weight, centerness_loss=centerness_loss * self.loss_cls_weight, loss_loc_init=loss_loc_init * self.loss_loc_init_weight, loss_loc_refine=loss_loc_refine * self.loss_loc_refine_weight, )