예제 #1
0
    def loss_boxes_var_nll(self, outputs, targets, indices, num_boxes):
        """Compute the losses related to the bounding boxes, the nll probabilistic regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        if 'pred_boxes_cov' not in outputs:
            return self.loss_boxes(outputs, targets, indices, num_boxes)

        assert 'pred_boxes' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs['pred_boxes'][idx]
        src_vars = clamp_log_variance(outputs['pred_boxes_cov'][idx])

        target_boxes = torch.cat(
            [t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')

        if src_vars.shape[1] == 4:
            loss_nll = 0.5 * torch.exp(-src_vars) * loss_bbox + 0.5 * src_vars
        else:
            forecaster_cholesky = covariance_output_to_cholesky(src_vars)
            if forecaster_cholesky.shape[0] != 0:
                multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                    src_boxes, scale_tril=forecaster_cholesky)
                loss_nll = - \
                    multivariate_normal_dists.log_prob(target_boxes)
            else:
                loss_nll = loss_bbox

        loss_nll_final = loss_nll.sum() / num_boxes

        # Collect all losses
        losses = dict()
        losses['loss_bbox'] = loss_nll_final
        # Add iou loss
        losses = update_with_iou_loss(losses, src_boxes, target_boxes,
                                      num_boxes)

        return losses
예제 #2
0
    def retinanet_probabilistic_inference(
            self,
            input_im,
            outputs=None,
            ensemble_inference=False,
            outputs_list=None):
        """
        General RetinaNet probabilistic anchor-wise inference. Preliminary inference step for many post-processing
        based inference methods such as standard_nms, output_statistics, and bayes_od.
        Args:
            input_im (list): an input im list generated from dataset handler.
            outputs (list): outputs from model.forward. Will be computed internally if not provided.
            ensemble_inference (bool): True if ensembles are used for inference. If set to true, outputs_list must be externally provided.
            outputs_list (list): List of model() outputs, usually generated from ensembles of models.
        Returns:
            all_predicted_boxes,
            all_predicted_boxes_covariance (Tensor): Nx4x4 vectors used
            all_predicted_prob (Tensor): Nx1 scores which represent max of all_pred_prob_vectors. For usage in NMS and mAP computation.
            all_classes_idxs (Tensor): Nx1 Class ids to be used for NMS.
            all_predicted_prob_vectors (Tensor): NxK tensor where K is the number of classes.
        """
        is_epistemic = ((self.mc_dropout_enabled and self.num_mc_dropout_runs > 1)
                        or ensemble_inference) and outputs is None
        if is_epistemic:
            if self.mc_dropout_enabled and self.num_mc_dropout_runs > 1:
                outputs_list = self.model(
                    input_im,
                    return_anchorwise_output=True,
                    num_mc_dropout_runs=self.num_mc_dropout_runs)
                n_fms = len(self.model.in_features)
                outputs_list = [{key: value[i * n_fms:(i + 1) * n_fms] if value is not None else value for key,
                                 value in outputs_list.items()} for i in range(self.num_mc_dropout_runs)]

            outputs = {'anchors': outputs_list[0]['anchors']}

            # Compute box classification and classification variance means
            box_cls = [output['box_cls'] for output in outputs_list]

            box_cls_mean = box_cls[0]
            for i in range(len(box_cls) - 1):
                box_cls_mean = [box_cls_mean[j] + box_cls[i][j]
                                for j in range(len(box_cls_mean))]
            box_cls_mean = [
                box_cls_f_map /
                len(box_cls) for box_cls_f_map in box_cls_mean]
            outputs.update({'box_cls': box_cls_mean})

            if outputs_list[0]['box_cls_var'] is not None:
                box_cls_var = [output['box_cls_var']
                               for output in outputs_list]
                box_cls_var_mean = box_cls_var[0]
                for i in range(len(box_cls_var) - 1):
                    box_cls_var_mean = [
                        box_cls_var_mean[j] +
                        box_cls_var[i][j] for j in range(
                            len(box_cls_var_mean))]
                box_cls_var_mean = [
                    box_cls_var_f_map /
                    len(box_cls_var) for box_cls_var_f_map in box_cls_var_mean]
            else:
                box_cls_var_mean = None
            outputs.update({'box_cls_var': box_cls_var_mean})

            # Compute box regression epistemic variance and mean, and aleatoric
            # variance mean
            box_delta_list = [output['box_delta']
                              for output in outputs_list]
            box_delta_mean = box_delta_list[0]
            for i in range(len(box_delta_list) - 1):
                box_delta_mean = [
                    box_delta_mean[j] +
                    box_delta_list[i][j] for j in range(
                        len(box_delta_mean))]
            box_delta_mean = [
                box_delta_f_map /
                len(box_delta_list) for box_delta_f_map in box_delta_mean]
            outputs.update({'box_delta': box_delta_mean})

            if outputs_list[0]['box_reg_var'] is not None:
                box_reg_var = [output['box_reg_var']
                               for output in outputs_list]
                box_reg_var_mean = box_reg_var[0]
                for i in range(len(box_reg_var) - 1):
                    box_reg_var_mean = [
                        box_reg_var_mean[j] +
                        box_reg_var[i][j] for j in range(
                            len(box_reg_var_mean))]
                box_reg_var_mean = [
                    box_delta_f_map /
                    len(box_reg_var) for box_delta_f_map in box_reg_var_mean]
            else:
                box_reg_var_mean = None
            outputs.update({'box_reg_var': box_reg_var_mean})

        elif outputs is None:
            outputs = self.model(input_im, return_anchorwise_output=True)

        all_anchors = []
        all_predicted_deltas = []
        all_predicted_boxes_cholesky = []
        all_predicted_prob = []
        all_classes_idxs = []
        all_predicted_prob_vectors = []
        all_predicted_boxes_epistemic_covar = []

        for i, anchors in enumerate(outputs['anchors']):
            box_cls = outputs['box_cls'][i][0]
            box_delta = outputs['box_delta'][i][0]

            # If classification aleatoric uncertainty available, perform
            # monte-carlo sampling to generate logits.
            if outputs['box_cls_var'] is not None:
                box_cls_var = outputs['box_cls_var'][i][0]
                box_cls_dists = torch.distributions.normal.Normal(
                    box_cls, scale=torch.sqrt(torch.exp(box_cls_var)))
                box_cls = box_cls_dists.rsample(
                    (self.model.cls_var_num_samples,))
                box_cls = torch.mean(box_cls.sigmoid_(), 0)
            else:
                box_cls = box_cls.sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.model.test_topk_candidates, box_delta.size(0))
            predicted_prob, classes_idxs = torch.max(box_cls, 1)
            predicted_prob, topk_idxs = predicted_prob.topk(num_topk)
            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.model.test_score_thresh
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]
            anchor_idxs = topk_idxs
            classes_idxs = classes_idxs[topk_idxs]

            box_delta = box_delta[anchor_idxs]
            anchors = anchors[anchor_idxs]

            cholesky_decomp = None

            if outputs['box_reg_var'] is not None:
                box_reg_var = outputs['box_reg_var'][i][0][anchor_idxs]
                box_reg_var = clamp_log_variance(box_reg_var)

                # Construct cholesky decomposition using diagonal vars
                cholesky_decomp = covariance_output_to_cholesky(box_reg_var)

            # In case dropout is enabled, we need to compute aleatoric
            # covariance matrix and add it here:
            box_reg_epistemic_covar = None
            if is_epistemic:
                # Compute epistemic box covariance matrix
                box_delta_list_i = [
                    self.model.box2box_transform.apply_deltas(
                        box_delta_i[i][0][anchor_idxs],
                        anchors.tensor) for box_delta_i in box_delta_list]

                _, box_reg_epistemic_covar = inference_utils.compute_mean_covariance_torch(
                    box_delta_list_i)

            all_predicted_deltas.append(box_delta)
            all_predicted_boxes_cholesky.append(cholesky_decomp)
            all_anchors.append(anchors.tensor)
            all_predicted_prob.append(predicted_prob)
            all_predicted_prob_vectors.append(box_cls[anchor_idxs])
            all_classes_idxs.append(classes_idxs)
            all_predicted_boxes_epistemic_covar.append(box_reg_epistemic_covar)

        box_delta = cat(all_predicted_deltas)
        anchors = cat(all_anchors)

        if isinstance(all_predicted_boxes_cholesky[0], torch.Tensor):
            # Generate multivariate samples to be used for monte-carlo simulation. We can afford much more samples
            # here since the matrix dimensions are much smaller and therefore
            # have much less memory footprint. Keep 100 or less to maintain
            # reasonable runtime speed.
            cholesky_decomp = cat(all_predicted_boxes_cholesky)

            multivariate_normal_samples = torch.distributions.MultivariateNormal(
                box_delta, scale_tril=cholesky_decomp)

            # Define monte-carlo samples
            distributions_samples = multivariate_normal_samples.rsample(
                (1000,))
            distributions_samples = torch.transpose(
                torch.transpose(distributions_samples, 0, 1), 1, 2)
            samples_anchors = torch.repeat_interleave(
                anchors.unsqueeze(2), 1000, dim=2)

            # Transform samples from deltas to boxes
            t_dist_samples = self.sample_box2box_transform.apply_samples_deltas(
                distributions_samples, samples_anchors)

            # Compute samples mean and covariance matrices.
            all_predicted_boxes, all_predicted_boxes_covariance = inference_utils.compute_mean_covariance_torch(
                t_dist_samples)
            if isinstance(
                    all_predicted_boxes_epistemic_covar[0],
                    torch.Tensor):
                epistemic_covar_mats = cat(
                    all_predicted_boxes_epistemic_covar)
                all_predicted_boxes_covariance += epistemic_covar_mats
        else:
            # This handles the case where no aleatoric uncertainty is available
            if is_epistemic:
                all_predicted_boxes_covariance = cat(
                    all_predicted_boxes_epistemic_covar)
            else:
                all_predicted_boxes_covariance = []

            # predict boxes
            all_predicted_boxes = self.model.box2box_transform.apply_deltas(
                box_delta, anchors)

        return all_predicted_boxes, all_predicted_boxes_covariance, cat(
            all_predicted_prob), cat(all_classes_idxs), cat(all_predicted_prob_vectors)
    def generalized_rcnn_probabilistic_inference(self,
                                                 input_im,
                                                 outputs=None,
                                                 ensemble_inference=False,
                                                 outputs_list=None):
        """
        General RetinaNet probabilistic anchor-wise inference. Preliminary inference step for many post-processing
        based inference methods such as standard_nms, output_statistics, and bayes_od.
        Args:
            input_im (list): an input im list generated from dataset handler.
            outputs (list): outputs from model.forward(). will be computed internally if not provided.
            ensemble_inference (bool): True if ensembles are used for inference. If set to true, outputs_list must be externally provided.
            outputs_list (list): List of model() outputs, usually generated from ensembles of models.
        Returns:
            all_predicted_boxes,
            all_predicted_boxes_covariance (Tensor): Nx4x4 vectors used
            all_predicted_prob (Tensor): Nx1 scores which represent max of all_pred_prob_vectors. For usage in NMS and mAP computation.
            all_classes_idxs (Tensor): Nx1 Class ids to be used for NMS.
            all_predicted_prob_vectors (Tensor): NxK tensor where K is the number of classes.
        """
        is_epistemic = (
            (self.mc_dropout_enabled and self.num_mc_dropout_runs > 1)
            or ensemble_inference) and outputs is None
        if is_epistemic:
            if self.mc_dropout_enabled and self.num_mc_dropout_runs > 1:
                outputs_list = self.model(
                    input_im,
                    return_anchorwise_output=True,
                    num_mc_dropout_runs=self.num_mc_dropout_runs)

            proposals_list = [outputs['proposals'] for outputs in outputs_list]
            box_delta_list = [outputs['box_delta'] for outputs in outputs_list]
            box_cls_list = [outputs['box_cls'] for outputs in outputs_list]
            box_reg_var_list = [
                outputs['box_reg_var'] for outputs in outputs_list
            ]
            box_cls_var_list = [
                outputs['box_cls_var'] for outputs in outputs_list
            ]
            outputs = dict()

            proposals_all = proposals_list[0].proposal_boxes.tensor
            for i in torch.arange(1, len(outputs_list)):
                proposals_all = torch.cat(
                    (proposals_all, proposals_list[i].proposal_boxes.tensor),
                    0)
            proposals_list[0].proposal_boxes.tensor = proposals_all
            outputs['proposals'] = proposals_list[0]

            box_delta = torch.cat(box_delta_list, 0)
            box_cls = torch.cat(box_cls_list, 0)
            outputs['box_delta'] = box_delta
            outputs['box_cls'] = box_cls

            if box_reg_var_list[0] is not None:
                box_reg_var = torch.cat(box_reg_var_list, 0)
            else:
                box_reg_var = None
            outputs['box_reg_var'] = box_reg_var

            if box_cls_var_list[0] is not None:
                box_cls_var = torch.cat(box_cls_var_list, 0)
            else:
                box_cls_var = None
            outputs['box_cls_var'] = box_cls_var

        elif outputs is None:
            outputs = self.model(input_im, return_anchorwise_output=True)

        proposals = outputs['proposals']
        box_cls = outputs['box_cls']
        box_delta = outputs['box_delta']

        if self.model.cls_var_loss == 'evidential':
            box_dir_alphas = inference_utils.get_dir_alphas(box_cls)
            box_dir_alphas = box_dir_alphas
            box_cls = box_dir_alphas / box_dir_alphas.sum(1, keepdim=True)
        else:
            if outputs['box_cls_var'] is not None:
                box_cls_var = outputs['box_cls_var']
                box_cls_dists = torch.distributions.normal.Normal(
                    box_cls, scale=torch.sqrt(torch.exp(box_cls_var)))
                box_cls = box_cls_dists.rsample(
                    (self.model.cls_var_num_samples, ))
                box_cls = torch.nn.functional.softmax(box_cls, dim=-1)
                box_cls = box_cls.mean(0)
            else:
                box_cls = torch.nn.functional.softmax(box_cls, dim=-1)

        # Remove background category
        scores = box_cls[:, :-1]

        num_bbox_reg_classes = box_delta.shape[1] // 4
        box_delta = box_delta.reshape(-1, 4)
        box_delta = box_delta.view(-1, num_bbox_reg_classes, 4)
        filter_mask = scores > self.test_score_thres

        filter_inds = filter_mask.nonzero(as_tuple=False)
        if num_bbox_reg_classes == 1:
            box_delta = box_delta[filter_inds[:, 0], 0]
        else:
            box_delta = box_delta[filter_mask]

        scores = scores[filter_mask]
        proposal_boxes = proposals.proposal_boxes.tensor[filter_inds[:, 0]]

        if outputs['box_reg_var'] is not None:
            box_reg_var = outputs['box_reg_var']
            box_reg_var = box_reg_var.reshape(-1, self.model.bbox_cov_dims)
            box_reg_var = box_reg_var.view(-1, num_bbox_reg_classes,
                                           self.model.bbox_cov_dims)

            if num_bbox_reg_classes == 1:
                box_reg_var = box_reg_var[filter_inds[:, 0], 0]
            else:
                box_reg_var = box_reg_var[filter_mask]

            # Reconstruct cholesky decomposition of box covariance
            # matrix
            diag_vars = clamp_log_variance(box_reg_var)
            cholesky_decomp = covariance_output_to_cholesky(diag_vars)

            # Generate multivariate samples to be used for monte-carlo simulation. We can afford much more samples
            # here since the matrix dimensions are much smaller and therefore
            # have much less memory footprint. Keep 100 or less to maintain
            # reasonable runtime speed.
            multivariate_normal_samples = torch.distributions.MultivariateNormal(
                box_delta, scale_tril=cholesky_decomp)

            # Define monte-carlo samples
            distributions_samples = multivariate_normal_samples.rsample(
                (1000, ))
            distributions_samples = torch.transpose(
                torch.transpose(distributions_samples, 0, 1), 1, 2)
            samples_proposals = torch.repeat_interleave(
                proposal_boxes.unsqueeze(2), 1000, dim=2)

            # Transform samples from deltas to boxes
            t_dist_samples = self.sample_box2box_transform.apply_samples_deltas(
                distributions_samples, samples_proposals)

            # Compute samples mean and covariance matrices.
            boxes, boxes_covars = inference_utils.compute_mean_covariance_torch(
                t_dist_samples)
        else:
            # predict boxes
            boxes = self.model.roi_heads.box_predictor.box2box_transform.apply_deltas(
                box_delta, proposal_boxes)
            boxes_covars = []

        return boxes, boxes_covars, scores, filter_inds[:, 1], box_cls[
            filter_inds[:, 0]]
예제 #4
0
    def losses(
            self,
            anchors,
            gt_classes,
            gt_boxes,
            pred_class_logits,
            pred_anchor_deltas,
            pred_class_logits_var=None,
            pred_bbox_cov=None):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`RetinaNet.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits`, `pred_anchor_deltas`, `pred_class_logits_var` and `pred_bbox_cov`, see
                :meth:`RetinaNetHead.forward`.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """
        num_images = len(gt_classes)
        gt_labels = torch.stack(gt_classes)  # (N, R)
        anchors = type(anchors[0]).cat(anchors).tensor  # (R, 4)
        gt_anchor_deltas = [
            self.box2box_transform.get_deltas(
                anchors, k) for k in gt_boxes]
        gt_anchor_deltas = torch.stack(gt_anchor_deltas)  # (N, R, 4)

        valid_mask = gt_labels >= 0
        pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes)
        num_pos_anchors = pos_mask.sum().item()
        get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images)
        self.loss_normalizer = self.loss_normalizer_momentum * self.loss_normalizer + \
            (1 - self.loss_normalizer_momentum) * max(num_pos_anchors, 1)

        # classification and regression loss

        # Shapes:
        # (N x R, K) for class_logits and class_logits_var.
        # (N x R, 4), (N x R x 10) for pred_anchor_deltas and pred_class_bbox_cov respectively.

        # Transform per-feature layer lists to a single tensor
        pred_class_logits = cat(pred_class_logits, dim=1)
        pred_anchor_deltas = cat(pred_anchor_deltas, dim=1)

        if pred_class_logits_var is not None:
            pred_class_logits_var = cat(
                pred_class_logits_var, dim=1)

        if pred_bbox_cov is not None:
            pred_bbox_cov = cat(
                pred_bbox_cov, dim=1)

        gt_classes_target = torch.nn.functional.one_hot(
            gt_labels[valid_mask],
            num_classes=self.num_classes +
            1)[
            :,
            :-
            1].to(
            pred_class_logits[0].dtype)  # no loss for the last (background) class

        # Classification losses
        if self.compute_cls_var:
            # Compute classification variance according to:
            # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
            if self.cls_var_loss == 'loss_attenuation':
                num_samples = self.cls_var_num_samples
                # Compute standard deviation
                pred_class_logits_var = torch.sqrt(torch.exp(
                    pred_class_logits_var[valid_mask]))

                pred_class_logits = pred_class_logits[valid_mask]

                # Produce normal samples using logits as the mean and the standard deviation computed above
                # Scales with GPU memory. 12 GB ---> 3 Samples per anchor for
                # COCO dataset.
                univariate_normal_dists = distributions.normal.Normal(
                    pred_class_logits, scale=pred_class_logits_var)

                pred_class_stochastic_logits = univariate_normal_dists.rsample(
                    (num_samples,))
                pred_class_stochastic_logits = pred_class_stochastic_logits.view(
                    (pred_class_stochastic_logits.shape[1] * num_samples, pred_class_stochastic_logits.shape[2], -1))
                pred_class_stochastic_logits = pred_class_stochastic_logits.squeeze(
                    2)

                # Produce copies of the target classes to match the number of
                # stochastic samples.
                gt_classes_target = torch.unsqueeze(gt_classes_target, 0)
                gt_classes_target = torch.repeat_interleave(
                    gt_classes_target, num_samples, dim=0).view(
                    (gt_classes_target.shape[1] * num_samples, gt_classes_target.shape[2], -1))
                gt_classes_target = gt_classes_target.squeeze(2)

                # Produce copies of the target classes to form the stochastic
                # focal loss.
                loss_cls = sigmoid_focal_loss_jit(
                    pred_class_stochastic_logits,
                    gt_classes_target,
                    alpha=self.focal_loss_alpha,
                    gamma=self.focal_loss_gamma,
                    reduction="sum",
                ) / (num_samples * max(1, self.loss_normalizer))
            else:
                raise ValueError(
                    'Invalid classification loss name {}.'.format(
                        self.bbox_cov_loss))
        else:
            # Standard loss computation in case one wants to use this code
            # without any probabilistic inference.
            loss_cls = sigmoid_focal_loss_jit(
                pred_class_logits[valid_mask],
                gt_classes_target,
                alpha=self.focal_loss_alpha,
                gamma=self.focal_loss_gamma,
                reduction="sum",
            ) / max(1, self.loss_normalizer)

        # Compute Regression Loss
        pred_anchor_deltas = pred_anchor_deltas[pos_mask]
        gt_anchors_deltas = gt_anchor_deltas[pos_mask]
        if self.compute_bbox_cov:
            # We have to clamp the output variance else probabilistic metrics
            # go to infinity.
            pred_bbox_cov = clamp_log_variance(pred_bbox_cov[pos_mask])
            if self.bbox_cov_loss == 'negative_log_likelihood':
                if self.bbox_cov_type == 'diagonal':
                    # Compute regression variance according to:
                    # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
                    # This implementation with smooth_l1_loss outperforms using
                    # torch.distribution.multivariate_normal. Losses might have different numerical values
                    # since we do not include constants in this implementation.
                    loss_box_reg = 0.5 * torch.exp(-pred_bbox_cov) * smooth_l1_loss(
                        pred_anchor_deltas,
                        gt_anchors_deltas,
                        beta=self.smooth_l1_beta)
                    loss_covariance_regularize = 0.5 * pred_bbox_cov
                    loss_box_reg += loss_covariance_regularize

                    # Sum over all elements
                    loss_box_reg = torch.sum(
                        loss_box_reg) / max(1, self.loss_normalizer)
                else:
                    # Multivariate negative log likelihood. Implemented with
                    # pytorch multivariate_normal.log_prob function. Custom implementations fail to finish training
                    # due to NAN loss.

                    # This is the Cholesky decomposition of the covariance matrix. We reconstruct it from 10 estimated
                    # parameters as a lower triangular matrix.
                    forecaster_cholesky = covariance_output_to_cholesky(
                        pred_bbox_cov)

                    # Compute multivariate normal distribution using torch
                    # distribution functions.
                    multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                        pred_anchor_deltas, scale_tril=forecaster_cholesky)

                    loss_box_reg = - \
                        multivariate_normal_dists.log_prob(gt_anchors_deltas)
                    loss_box_reg = torch.sum(
                        loss_box_reg) / max(1, self.loss_normalizer)

            elif self.bbox_cov_loss == 'second_moment_matching':
                # Compute regression covariance using second moment matching.
                loss_box_reg = smooth_l1_loss(
                    pred_anchor_deltas,
                    gt_anchors_deltas,
                    beta=self.smooth_l1_beta)

                # Compute errors
                errors = (pred_anchor_deltas - gt_anchors_deltas)

                if self.bbox_cov_type == 'diagonal':
                    # Compute second moment matching term.
                    second_moment_matching_term = smooth_l1_loss(
                        torch.exp(pred_bbox_cov), errors ** 2, beta=self.smooth_l1_beta)
                    loss_box_reg += second_moment_matching_term
                    loss_box_reg = torch.sum(
                        loss_box_reg) / max(1, self.loss_normalizer)
                else:
                    # Compute second moment matching term.
                    errors = torch.unsqueeze(errors, 2)
                    gt_error_covar = torch.matmul(
                        errors, torch.transpose(errors, 2, 1))

                    # This is the cholesky decomposition of the covariance matrix. We reconstruct it from 10 estimated
                    # parameters as a lower triangular matrix.
                    forecaster_cholesky = covariance_output_to_cholesky(
                        pred_bbox_cov)

                    predicted_covar = torch.matmul(
                        forecaster_cholesky, torch.transpose(
                            forecaster_cholesky, 2, 1))

                    second_moment_matching_term = smooth_l1_loss(
                        predicted_covar, gt_error_covar, beta=self.smooth_l1_beta, reduction='sum')

                    loss_box_reg = (torch.sum(
                        loss_box_reg) + second_moment_matching_term) / max(1, self.loss_normalizer)

            elif self.bbox_cov_loss == 'energy_loss':
                # Compute regression variance according to energy score loss.
                forecaster_means = pred_anchor_deltas

                # Compute forecaster cholesky. Takes care of diagonal case
                # automatically.
                forecaster_cholesky = covariance_output_to_cholesky(
                    pred_bbox_cov)

                # Define normal distribution samples. To compute energy score,
                # we need i+1 samples.

                # Define per-anchor Distributions
                multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                    forecaster_means, scale_tril=forecaster_cholesky)

                # Define Monte-Carlo Samples
                distributions_samples = multivariate_normal_dists.rsample(
                    (self.bbox_cov_num_samples + 1,))

                distributions_samples_1 = distributions_samples[0:self.bbox_cov_num_samples, :, :]
                distributions_samples_2 = distributions_samples[1:
                                                                self.bbox_cov_num_samples + 1, :, :]

                # Compute energy score
                gt_anchors_deltas_samples = torch.repeat_interleave(
                    gt_anchors_deltas.unsqueeze(0), self.bbox_cov_num_samples, dim=0)

                energy_score_first_term = 2.0 * smooth_l1_loss(
                    distributions_samples_1,
                    gt_anchors_deltas_samples,
                    beta=self.smooth_l1_beta,
                    reduction="sum") / self.bbox_cov_num_samples  # First term

                energy_score_second_term = - smooth_l1_loss(
                    distributions_samples_1,
                    distributions_samples_2,
                    beta=self.smooth_l1_beta,
                    reduction="sum") / self.bbox_cov_num_samples   # Second term

                # Final Loss
                loss_box_reg = (
                    energy_score_first_term + energy_score_second_term) / max(1, self.loss_normalizer)

            else:
                raise ValueError(
                    'Invalid regression loss name {}.'.format(
                        self.bbox_cov_loss))

            # Perform loss annealing. Essential for reliably training variance estimates using NLL in RetinaNet.
            # For energy score and second moment matching, this is optional.
            standard_regression_loss = smooth_l1_loss(
                pred_anchor_deltas,
                gt_anchors_deltas,
                beta=self.smooth_l1_beta,
                reduction="sum",
            ) / max(1, self.loss_normalizer)

            probabilistic_loss_weight = get_probabilistic_loss_weight(
                self.current_step, self.annealing_step)
            loss_box_reg = (1.0 - probabilistic_loss_weight) * \
                standard_regression_loss + probabilistic_loss_weight * loss_box_reg
        else:
            # Standard regression loss in case no variance is needed to be
            # estimated.
            loss_box_reg = smooth_l1_loss(
                pred_anchor_deltas,
                gt_anchors_deltas,
                beta=self.smooth_l1_beta,
                reduction="sum",
            ) / max(1, self.loss_normalizer)

        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
예제 #5
0
    def loss_boxes_energy(
            self,
            outputs,
            targets,
            indices,
            num_boxes):
        """Compute the losses related to the bounding boxes, the energy distance loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        if 'pred_boxes_cov' not in outputs:
            return self.loss_boxes(outputs, targets, indices, num_boxes)

        assert 'pred_boxes' in outputs

        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs['pred_boxes'][idx]
        target_boxes = torch.cat([t['boxes'][i]
                                  for t, (_, i) in zip(targets, indices)], dim=0)

        # Begin probabilistic loss computation
        src_vars = clamp_log_variance(outputs['pred_boxes_cov'][idx])
        forecaster_cholesky = covariance_output_to_cholesky(
            src_vars)
        multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
            src_boxes, scale_tril=forecaster_cholesky)

        # Define Monte-Carlo Samples
        distributions_samples = multivariate_normal_dists.rsample(
            (self.bbox_cov_num_samples + 1,))

        distributions_samples_1 = distributions_samples[0:self.bbox_cov_num_samples, :, :]
        distributions_samples_2 = distributions_samples[1:
                                                        self.bbox_cov_num_samples + 1, :, :]

        # Compute energy score. Smooth L1 loss is preferred in this case to
        # maintain the proper scoring properties.
        loss_covariance_regularize = - F.l1_loss(
            distributions_samples_1,
            distributions_samples_2,
            reduction="sum") / self.bbox_cov_num_samples  # Second term

        gt_proposals_delta_samples = torch.repeat_interleave(
            target_boxes.unsqueeze(0), self.bbox_cov_num_samples, dim=0)

        loss_first_moment_match = 2 * F.l1_loss(
            distributions_samples_1,
            gt_proposals_delta_samples,
            reduction="sum") / self.bbox_cov_num_samples  # First term

        loss_energy = loss_first_moment_match + loss_covariance_regularize

        # Normalize and add losses
        loss_energy_final = loss_energy.sum() / num_boxes

        # Collect all losses
        losses = dict()
        losses['loss_bbox'] = loss_energy_final
        # Add iou loss
        losses = update_with_iou_loss(
            losses, src_boxes, target_boxes, num_boxes)

        return losses
    def losses(self, predictions, proposals, current_step=0):
        """
        Args:
            predictions: return values of :meth:`forward()`.
            proposals (list[Instances]): proposals that match the features
                that were used to compute predictions.
            current_step: current optimizer step. Used for losses with an annealing component.
        """
        global device

        pred_class_logits, pred_proposal_deltas, pred_class_logits_var, pred_proposal_covs = predictions

        if len(proposals):
            box_type = type(proposals[0].proposal_boxes)
            # cat(..., dim=0) concatenates over all images in the batch
            proposals_boxes = box_type.cat(
                [p.proposal_boxes for p in proposals])
            assert (
                not proposals_boxes.tensor.requires_grad), "Proposals should not require gradients!"

            # The following fields should exist only when training.
            if proposals[0].has("gt_boxes"):
                gt_boxes = box_type.cat([p.gt_boxes for p in proposals])
                assert proposals[0].has("gt_classes")
                gt_classes = cat([p.gt_classes for p in proposals], dim=0)
        else:
            proposals_boxes = Boxes(
                torch.zeros(
                    0, 4, device=pred_proposal_deltas.device))

        no_instances = len(proposals) == 0  # no instances found

        # Compute Classification Loss
        if no_instances:
            # TODO 0.0 * pred.sum() is enough since PT1.6
            loss_cls = 0.0 * F.cross_entropy(
                pred_class_logits,
                torch.zeros(
                    0,
                    dtype=torch.long,
                    device=pred_class_logits.device),
                reduction="sum",)
        else:
            if self.compute_cls_var:
                # Compute classification variance according to:
                # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
                if self.cls_var_loss == 'loss_attenuation':
                    num_samples = self.cls_var_num_samples

                    # Compute standard deviation
                    pred_class_logits_var = torch.sqrt(
                        torch.exp(pred_class_logits_var))

                    # Produce normal samples using logits as the mean and the standard deviation computed above
                    # Scales with GPU memory. 12 GB ---> 3 Samples per anchor for
                    # COCO dataset.
                    univariate_normal_dists = distributions.normal.Normal(
                        pred_class_logits, scale=pred_class_logits_var)

                    pred_class_stochastic_logits = univariate_normal_dists.rsample(
                        (num_samples,))
                    pred_class_stochastic_logits = pred_class_stochastic_logits.view(
                        (pred_class_stochastic_logits.shape[1] * num_samples, pred_class_stochastic_logits.shape[2], -1))
                    pred_class_logits = pred_class_stochastic_logits.squeeze(
                        2)

                    # Produce copies of the target classes to match the number of
                    # stochastic samples.
                    gt_classes_target = torch.unsqueeze(gt_classes, 0)
                    gt_classes_target = torch.repeat_interleave(
                        gt_classes_target, num_samples, dim=0).view(
                        (gt_classes_target.shape[1] * num_samples, -1))
                    gt_classes_target = gt_classes_target.squeeze(1)

                    loss_cls = F.cross_entropy(
                        pred_class_logits, gt_classes_target, reduction="mean")

            elif self.cls_var_loss == 'evidential':
                # ToDo: Currently does not provide any reasonable mAP Results
                # (15% mAP)

                # Assume dirichlet parameters are output.
                alphas = get_dir_alphas(pred_class_logits)

                # Get sum of all alphas
                dirichlet_s = alphas.sum(1).unsqueeze(1)

                # Generate one hot vectors for ground truth
                one_hot_vectors = torch.nn.functional.one_hot(
                    gt_classes, alphas.shape[1])

                # Compute loss. This loss attempts to put all evidence on the
                # correct location.
                per_instance_loss = (
                    one_hot_vectors * (torch.digamma(dirichlet_s) - torch.digamma(alphas)))

                # Compute KL divergence regularizer loss
                estimated_dirichlet = torch.distributions.dirichlet.Dirichlet(
                    (alphas - 1.0) * (1.0 - one_hot_vectors) + 1.0)
                uniform_dirichlet = torch.distributions.dirichlet.Dirichlet(
                    torch.ones_like(one_hot_vectors).type(torch.FloatTensor).to(device))
                kl_regularization_loss = torch.distributions.kl.kl_divergence(
                    estimated_dirichlet, uniform_dirichlet)

                # Compute final loss
                annealing_multiplier = torch.min(
                    torch.as_tensor(
                        current_step /
                        self.annealing_step).to(device),
                    torch.as_tensor(1.0).to(device))

                per_proposal_loss = per_instance_loss.sum(
                    1) + annealing_multiplier * kl_regularization_loss

                # Compute evidence auxiliary loss
                evidence_maximization_loss = smooth_l1_loss(
                    dirichlet_s,
                    100.0 *
                    torch.ones_like(dirichlet_s).to(device),
                    beta=self.smooth_l1_beta,
                    reduction='mean')

                evidence_maximization_loss *= annealing_multiplier

                # Compute final loss
                foreground_loss = per_proposal_loss[(gt_classes >= 0) & (
                    gt_classes < pred_class_logits.shape[1] - 1)]
                background_loss = per_proposal_loss[gt_classes ==
                                                    pred_class_logits.shape[1] - 1]

                loss_cls = (torch.mean(foreground_loss) + torch.mean(background_loss)
                            ) / 2 + 0.01 * evidence_maximization_loss
            else:
                loss_cls = F.cross_entropy(
                    pred_class_logits, gt_classes, reduction="mean")

        # Compute regression loss:
        if no_instances:
            # TODO 0.0 * pred.sum() is enough since PT1.6
            loss_box_reg = 0.0 * smooth_l1_loss(
                pred_proposal_deltas,
                torch.zeros_like(pred_proposal_deltas),
                0.0,
                reduction="sum",
            )
        else:
            gt_proposal_deltas = self.box2box_transform.get_deltas(
                proposals_boxes.tensor, gt_boxes.tensor
            )
            box_dim = gt_proposal_deltas.size(1)  # 4 or 5
            cls_agnostic_bbox_reg = pred_proposal_deltas.size(1) == box_dim
            device = pred_proposal_deltas.device

            bg_class_ind = pred_class_logits.shape[1] - 1

            # Box delta loss is only computed between the prediction for the gt class k
            # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
            # for non-gt classes and background.
            # Empty fg_inds produces a valid loss of zero as long as the size_average
            # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally
            # and would produce a nan loss).
            fg_inds = torch.nonzero(
                (gt_classes >= 0) & (gt_classes < bg_class_ind), as_tuple=True
            )[0]
            if cls_agnostic_bbox_reg:
                # pred_proposal_deltas only corresponds to foreground class for
                # agnostic
                gt_class_cols = torch.arange(box_dim, device=device)
            else:
                fg_gt_classes = gt_classes[fg_inds]
                # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
                # where b is the dimension of box representation (4 or 5)
                # Note that compared to Detectron1,
                # we do not perform bounding box regression for background
                # classes.
                gt_class_cols = box_dim * \
                    fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
                gt_covar_class_cols = self.bbox_cov_dims * \
                    fg_gt_classes[:, None] + torch.arange(self.bbox_cov_dims, device=device)

            loss_reg_normalizer = gt_classes.numel()

            pred_proposal_deltas = pred_proposal_deltas[fg_inds[:,
                                                                None], gt_class_cols]
            gt_proposals_delta = gt_proposal_deltas[fg_inds]

            if self.compute_bbox_cov:
                pred_proposal_covs = pred_proposal_covs[fg_inds[:,
                                                                None], gt_covar_class_cols]
                pred_proposal_covs = clamp_log_variance(pred_proposal_covs)

                if self.bbox_cov_loss == 'negative_log_likelihood':
                    if self.bbox_cov_type == 'diagonal':
                        # Ger foreground proposals.
                        _proposals_boxes = proposals_boxes.tensor[fg_inds]

                        # Compute regression negative log likelihood loss according to:
                        # "What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?", NIPS 2017
                        loss_box_reg = 0.5 * torch.exp(-pred_proposal_covs) * smooth_l1_loss(
                            pred_proposal_deltas, gt_proposals_delta, beta=self.smooth_l1_beta)
                        loss_covariance_regularize = 0.5 * pred_proposal_covs
                        loss_box_reg += loss_covariance_regularize

                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer
                    else:
                        # Multivariate Gaussian Negative Log Likelihood loss using pytorch
                        # distributions.multivariate_normal.log_prob()
                        forecaster_cholesky = covariance_output_to_cholesky(
                            pred_proposal_covs)

                        multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                            pred_proposal_deltas, scale_tril=forecaster_cholesky)

                        loss_box_reg = - \
                            multivariate_normal_dists.log_prob(gt_proposals_delta)
                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer

                elif self.bbox_cov_loss == 'second_moment_matching':
                    # Compute regression covariance using second moment
                    # matching.
                    loss_box_reg = smooth_l1_loss(pred_proposal_deltas,
                                                  gt_proposals_delta,
                                                  self.smooth_l1_beta)
                    errors = (pred_proposal_deltas - gt_proposals_delta)
                    if self.bbox_cov_type == 'diagonal':
                        # Handel diagonal case
                        second_moment_matching_term = smooth_l1_loss(
                            torch.exp(pred_proposal_covs), errors ** 2, beta=self.smooth_l1_beta)
                        loss_box_reg += second_moment_matching_term
                        loss_box_reg = torch.sum(
                            loss_box_reg) / loss_reg_normalizer
                    else:
                        # Handel full covariance case
                        errors = torch.unsqueeze(errors, 2)
                        gt_error_covar = torch.matmul(
                            errors, torch.transpose(errors, 2, 1))

                        # This is the cholesky decomposition of the covariance matrix.
                        # We reconstruct it from 10 estimated parameters as a
                        # lower triangular matrix.
                        forecaster_cholesky = covariance_output_to_cholesky(
                            pred_proposal_covs)

                        predicted_covar = torch.matmul(
                            forecaster_cholesky, torch.transpose(
                                forecaster_cholesky, 2, 1))

                        second_moment_matching_term = smooth_l1_loss(
                            predicted_covar, gt_error_covar, beta=self.smooth_l1_beta, reduction='sum')
                        loss_box_reg = (
                            torch.sum(loss_box_reg) + second_moment_matching_term) / loss_reg_normalizer

                elif self.bbox_cov_loss == 'energy_loss':
                    forecaster_cholesky = covariance_output_to_cholesky(
                        pred_proposal_covs)

                    # Define per-anchor Distributions
                    multivariate_normal_dists = distributions.multivariate_normal.MultivariateNormal(
                        pred_proposal_deltas, scale_tril=forecaster_cholesky)
                    # Define Monte-Carlo Samples
                    distributions_samples = multivariate_normal_dists.rsample(
                        (self.bbox_cov_num_samples + 1,))

                    distributions_samples_1 = distributions_samples[0:self.bbox_cov_num_samples, :, :]
                    distributions_samples_2 = distributions_samples[1:
                                                                    self.bbox_cov_num_samples + 1, :, :]

                    # Compute energy score
                    loss_covariance_regularize = - smooth_l1_loss(
                        distributions_samples_1,
                        distributions_samples_2,
                        beta=self.smooth_l1_beta,
                        reduction="sum") / self.bbox_cov_num_samples   # Second term

                    gt_proposals_delta_samples = torch.repeat_interleave(
                        gt_proposals_delta.unsqueeze(0), self.bbox_cov_num_samples, dim=0)

                    loss_first_moment_match = 2.0 * smooth_l1_loss(
                        distributions_samples_1,
                        gt_proposals_delta_samples,
                        beta=self.smooth_l1_beta,
                        reduction="sum") / self.bbox_cov_num_samples  # First term

                    # Final Loss
                    loss_box_reg = (
                        loss_first_moment_match + loss_covariance_regularize) / loss_reg_normalizer
                else:
                    raise ValueError(
                        'Invalid regression loss name {}.'.format(
                            self.bbox_cov_loss))

                # Perform loss annealing. Not really essential in Generalized-RCNN case, but good practice for more
                # elaborate regression variance losses.
                standard_regression_loss = smooth_l1_loss(pred_proposal_deltas,
                                                          gt_proposals_delta,
                                                          self.smooth_l1_beta,
                                                          reduction="sum",)
                standard_regression_loss = standard_regression_loss / loss_reg_normalizer

                probabilistic_loss_weight = get_probabilistic_loss_weight(
                    current_step, self.annealing_step)

                loss_box_reg = (1.0 - probabilistic_loss_weight) * \
                    standard_regression_loss + probabilistic_loss_weight * loss_box_reg
            else:
                loss_box_reg = smooth_l1_loss(pred_proposal_deltas,
                                              gt_proposals_delta,
                                              self.smooth_l1_beta,
                                              reduction="sum",)
                loss_box_reg = loss_box_reg / loss_reg_normalizer

        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}