def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ loss_dict = {} if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) assert class_logits[0] is not None or box_regression[0] is not None, 'Fast R-CNN should keep 1 branch at least' if class_logits[0] is not None: class_logits = cat(class_logits, dim=0) classification_loss = F.cross_entropy(class_logits, labels) loss_dict["loss_classifier"] = classification_loss if box_regression[0] is not None: box_regression = cat(box_regression, dim=0) device = box_regression.device regression_targets = cat([proposal.get_field("regression_targets") for proposal in proposals], dim=0) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = torch.tensor([4, 5, 6, 7], device=device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=cfg.FAST_RCNN.SMOOTH_L1_BETA, ) box_loss = box_loss / labels.numel() loss_dict["loss_box_reg"] = box_loss return loss_dict
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = concat_box_prediction_layers( objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=cfg.RPN.SMOOTH_L1_BETA, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss