def __call__(self, matched_idxs):
        """
        Arguments:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
        """
        pos_idx = []
        neg_idx = []
        for matched_idxs_per_image in matched_idxs:
            positive = jt.nonzero(matched_idxs_per_image >= 1).squeeze(1)
            negative = jt.nonzero(matched_idxs_per_image == 0).squeeze(1)

            num_pos = int(self.batch_size_per_image * self.positive_fraction)
            # protect against not enough positive examples
            num_pos = min(positive.numel(), num_pos)
            num_neg = self.batch_size_per_image - num_pos
            # protect against not enough negative examples
            num_neg = min(negative.numel(), num_neg)

            # randomly select positive and negative examples
            perm1 = jt.randperm(positive.numel())[:num_pos]
            perm2 = jt.randperm(negative.numel())[:num_neg]

            pos_idx_per_image = positive[perm1]
            neg_idx_per_image = negative[perm2]

            # create binary mask from indices
            pos_idx_per_image_mask = jt.zeros_like(
                matched_idxs_per_image).bool()
            neg_idx_per_image_mask = jt.zeros_like(
                matched_idxs_per_image).bool()
            pos_idx_per_image_mask[pos_idx_per_image] = 1
            neg_idx_per_image_mask[neg_idx_per_image] = 1

            pos_idx.append(pos_idx_per_image_mask)
            neg_idx.append(neg_idx_per_image_mask)

        return pos_idx, neg_idx
    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            kp = proposals_per_image.get_field("keypoints")
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size)
            heatmaps.append(heatmaps_per_image.reshape(-1))
            valid.append(valid_per_image.reshape(-1))

        keypoint_targets = cat(heatmaps, dim=0)
        valid = cat(valid, dim=0).bool()
        valid = jt.nonzero(valid).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it sepaartely
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            return keypoint_logits.sum() * 0

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.reshape(N * K, H * W)

        keypoint_loss = nn.cross_entropy_loss(keypoint_logits[valid],
                                              keypoint_targets[valid])
        return keypoint_loss
    def select_top_predictions(self, predictions):
        """
        Select only predictions which have a `score` > self.confidence_threshold,
        and returns the predictions in descending order of score

        Arguments:
            predictions (BoxList): the result of the computation by the model.
                It should contain the field `scores`.

        Returns:
            prediction (BoxList): the detected objects. Additional information
                of the detection properties can be found in the fields of
                the BoxList via `prediction.fields()`
        """
        if predictions.has_field("mask_scores"):
            scores = predictions.get_field("mask_scores")
        else:
            scores = predictions.get_field("scores")

        if scores.shape[0]==0:
            return None
        
        keep = jt.nonzero(scores>self.confidence_threshold).squeeze(1)
        predictions = predictions[keep]
        scores = predictions.get_field("scores")
        idx,_ = jt.argsort(scores,0, descending=True)
        return predictions[idx]
Esempio n. 4
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets, mask_ratios = self.prepare_targets(
            proposals, targets)

        labels = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)

        positive_inds = jt.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]

        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            if not self.maskiou_on:
                return mask_logits.sum() * 0
            else:
                selected_index = jt.arange(mask_logits.shape[0])
                selected_mask = mask_logits[selected_index, labels]
                mask_num, mask_h, mask_w = selected_mask.shape
                selected_mask = selected_mask.reshape(mask_num, 1, mask_h,
                                                      mask_w)
                return mask_logits.sum() * 0, selected_mask, labels, None
        if self.maskiou_on:
            mask_ratios = cat(mask_ratios, dim=0)
            value_eps = 1e-10 * jt.ones((mask_targets.shape[0], ))
            mask_ratios = jt.maximum(mask_ratios, value_eps)
            pred_masks = mask_logits[positive_inds, labels_pos]
            pred_masks[:] = pred_masks > 0
            mask_targets_full_area = mask_targets.sum(
                dims=[1, 2]) / mask_ratios
            mask_ovr = pred_masks * mask_targets
            mask_ovr_area = mask_ovr.sum(dims=[1, 2])
            mask_union_area = pred_masks.sum(
                dims=[1, 2]) + mask_targets_full_area - mask_ovr_area
            value_1 = jt.ones((pred_masks.shape[0], ))
            value_0 = jt.zeros((pred_masks.shape[0], ))
            mask_union_area = jt.maximum(mask_union_area, value_1)
            mask_ovr_area = jt.maximum(mask_ovr_area, value_0)
            maskiou_targets = mask_ovr_area / mask_union_area

        binary_cross_entropy_with_logits = nn.BCEWithLogitsLoss()
        mask_loss = binary_cross_entropy_with_logits(
            mask_logits[positive_inds, labels_pos], mask_targets)
        if not self.maskiou_on:
            return mask_loss
        else:
            selected_index = jt.index((mask_logits.shape[0], ), dim=0)
            selected_mask = mask_logits[selected_index, labels]
            mask_num, mask_h, mask_w = selected_mask.shape
            selected_mask = selected_mask.reshape(mask_num, 1, mask_h, mask_w)
            selected_mask = selected_mask.sigmoid()
            return mask_loss, selected_mask, labels, maskiou_targets
Esempio n. 5
0
    def __call__(self, proposals, mask_logits, targets):
        """
        Arguments:
            proposals (list[BoxList])
            mask_logits (Tensor)
            targets (list[BoxList])

        Return:
            mask_loss (Tensor): scalar tensor containing the loss
        """
        labels, mask_targets = self.prepare_targets(proposals, targets)

        labels = cat(labels, dim=0)
        mask_targets = cat(mask_targets, dim=0)

        positive_inds = jt.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            return mask_logits.sum() * 0
        binary_cross_entropy_with_logits = nn.BCEWithLogitsLoss()
        mask_loss = binary_cross_entropy_with_logits(
            mask_logits[positive_inds, labels_pos], mask_targets)
        return mask_loss
    def subsample(self, proposals, targets):
        """
        This method performs the positive/negative sampling, and return
        the sampled proposals.
        Note: this function keeps a state.

        Arguments:
            proposals (list[BoxList])
            targets (list[BoxList])
        """

        labels, keypoints = self.prepare_targets(proposals, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)

        proposals = list(proposals)
        # add corresponding label and regression_targets information to the bounding boxes
        for labels_per_image, keypoints_per_image, proposals_per_image in zip(
                labels, keypoints, proposals):
            proposals_per_image.add_field("labels", labels_per_image)
            proposals_per_image.add_field("keypoints", keypoints_per_image)

        # distributed sampled proposals, that were obtained on all feature maps
        # concatenated via the fg_bg_sampler, into individual feature map levels
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
                zip(sampled_pos_inds, sampled_neg_inds)):
            img_sampled_inds = jt.nonzero(pos_inds_img).squeeze(1)
            proposals_per_image = proposals[img_idx][img_sampled_inds]
            proposals[img_idx] = proposals_per_image

        self._proposals = proposals
        return proposals
Esempio n. 7
0
    def prepare_targets(self, proposals, targets):
        labels = []
        masks = []
        mask_ratios = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            matched_targets = self.match_targets_to_proposals(
                proposals_per_image, targets_per_image)
            matched_idxs = matched_targets.get_field("matched_idxs")

            labels_per_image = matched_targets.get_field("labels")
            labels_per_image = labels_per_image.int32()

            # this can probably be removed, but is left here for clarity
            # and completeness
            neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
            labels_per_image[neg_inds] = 0

            # mask scores are only computed on positive samples
            positive_inds = jt.nonzero(labels_per_image > 0).squeeze(1)

            segmentation_masks = matched_targets.get_field("masks")
            segmentation_masks = segmentation_masks[positive_inds]

            positive_proposals = proposals_per_image[positive_inds]

            masks_per_image, mask_ratios_per_image = project_masks_on_boxes(
                segmentation_masks, positive_proposals,
                self.discretization_size, self.maskiou_on)

            labels.append(labels_per_image)
            masks.append(masks_per_image)
            mask_ratios.append(mask_ratios_per_image)

        return labels, masks, mask_ratios
Esempio n. 8
0
    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
        """
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        """
        # For each gt, find the prediction with which it has highest quality
        highest_quality_foreach_gt = match_quality_matrix.max(dim=1)
        # Find highest quality match available, even if it is low, including ties
        gt_pred_pairs_of_highest_quality = jt.nonzero(
            match_quality_matrix == highest_quality_foreach_gt.unsqueeze(1)
        )
        # Example gt_pred_pairs_of_highest_quality:
        #   tensor([[    0, 39796],
        #           [    1, 32055],
        #           [    1, 32070],
        #           [    2, 39190],
        #           [    2, 40255],
        #           [    3, 40390],
        #           [    3, 41455],
        #           [    4, 45470],
        #           [    5, 45325],
        #           [    5, 46390]])
        # Each row is a (gt index, prediction index)
        # Note how gt items 1, 2, 3, and 5 each have two ties

        pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
        matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            # multiclass nms

            result = boxlist_ml_nms(boxlists[i], self.nms_thresh)

            #print('ml_nms',jt.mean(result.bbox))
            #print('scores',jt.mean(result.get_field("scores")))

            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = jt.kthvalue(
                    cls_scores,
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                #print(number_of_detections - self.fpn_post_nms_top_n + 1,self.fpn_post_nms_top_n,image_thresh)
                keep = cls_scores >= image_thresh.item()
                keep = jt.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
Esempio n. 10
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[list[BoxList]])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor)
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = jt.nonzero(
            jt.contrib.concat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = jt.nonzero(
            jt.contrib.concat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = jt.contrib.concat([sampled_pos_inds, sampled_neg_inds],
                                         dim=0)

        objectness, box_regression = concat_box_prediction_layers(
            objectness, box_regression)

        objectness = objectness.squeeze(1)

        labels = jt.contrib.concat(labels, dim=0)
        regression_targets = jt.contrib.concat(regression_targets, dim=0)

        box_loss = _smooth_l1_loss(box_regression[sampled_pos_inds],
                                   regression_targets[sampled_pos_inds],
                                   sigma=3.) / (sampled_inds.numel())

        # bce_loss_with_logits = nn.BCEWithLogitsLoss()
        # objectness_loss = bce_loss_with_logits(
        #     objectness[sampled_inds], labels[sampled_inds]
        # )
        objectness_loss = nn.bce_loss(objectness[sampled_inds].sigmoid(),
                                      labels[sampled_inds])

        return objectness_loss, box_loss
Esempio n. 11
0
    def __call__(self, labels, pred_maskiou, gt_maskiou):

        positive_inds = jt.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[positive_inds]
        if labels_pos.numel() == 0:
            return pred_maskiou.sum() * 0
        gt_maskiou = gt_maskiou.detach()
        maskiou_loss = l2_loss(pred_maskiou[positive_inds, labels_pos],
                               gt_maskiou)
        maskiou_loss = self.loss_weight * maskiou_loss

        return maskiou_loss
Esempio n. 12
0
def l2_loss(input, target):
    """
    very similar to the smooth_l1_loss , but with
    the extra beta parameter
    """
    pos_inds = jt.nonzero(target > 0.0).squeeze(1)
    if pos_inds.shape[0] > 0:
        cond = jt.abs(input[pos_inds] - target[pos_inds])
        loss = 0.5 * cond**2 / pos_inds.shape[0]
    else:
        loss = input * 0.0
    return loss.sum()
    def __call__(self, class_logits, box_regression):
        """
        Computes the loss for Faster R-CNN.
        This requires that the subsample method has been called beforehand.

        Arguments:
            class_logits (list[Tensor])
            box_regression (list[Tensor])

        Returns:
            classification_loss (Tensor)
            box_loss (Tensor)
        """

        class_logits = cat(class_logits, dim=0)
        box_regression = cat(box_regression, dim=0)
        device = class_logits.device

        if not hasattr(self, "_proposals"):
            raise RuntimeError("subsample needs to be called before")

        proposals = self._proposals

        labels = cat([proposal.get_field("labels") for proposal in proposals],
                     dim=0)
        regression_targets = cat([
            proposal.get_field("regression_targets") for proposal in proposals
        ],
                                 dim=0)

        classification_loss = nn.cross_entropy_loss(class_logits, labels)

        # get indices that correspond to the regression targets for
        # the corresponding ground truth labels, to be used with
        # advanced indexing
        sampled_pos_inds_subset = jt.nonzero(labels > 0).squeeze(1)
        labels_pos = labels[sampled_pos_inds_subset]
        if self.cls_agnostic_bbox_reg:
            map_inds = jt.array([4, 5, 6, 7])
        else:
            map_inds = 4 * labels_pos[:, None] + jt.array([0, 1, 2, 3])

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds_subset[:, None], map_inds],
            regression_targets[sampled_pos_inds_subset],
            size_average=False,
            beta=1,
        )
        box_loss = box_loss / labels.numel()

        return classification_loss, box_loss
    def execute(self, x, boxes):
        """
        Arguments:
            x (list[Tensor]): feature maps for each level
            boxes (list[BoxList]): boxes to be used to perform the pooling operation.
        Returns:
            result (Tensor)
        """
        num_levels = len(self.poolers)
        #print('boxes',boxes[0].bbox)
        rois = self.convert_to_roi_format(boxes)
        if num_levels == 1:
            return self.poolers[0](x[0], rois)

        levels = self.map_levels(boxes)

        num_rois = rois.shape[0]
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype = str(x[0].dtype)
        result = jt.zeros(
            (num_rois, num_channels, output_size, output_size),
            dtype=dtype,
        )
        #print('rois',rois)
        i = 0
        for level, (per_level_feature,
                    pooler) in enumerate(zip(x, self.poolers.layers.values())):
            idx_in_level = jt.nonzero(levels == level).squeeze(1)

            rois_per_level = rois[idx_in_level]
            #print('idx_in_level',idx_in_level)
            #print('rois_per_level',rois_per_level)

            result[idx_in_level] = pooler(per_level_feature,
                                          rois_per_level).cast(dtype)
            #print(i,'---',pooler(per_level_feature, rois_per_level))

            i += 1
        return result
Esempio n. 15
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers(box_cls, box_regression)

        labels = jt.contrib.concat(labels, dim=0)
        regression_targets = jt.contrib.concat(regression_targets, dim=0)
        pos_inds = jt.nonzero(labels > 0).squeeze(1)

        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1,
                 pos_inds.numel() * self.regress_norm))

        labels = labels.int()

        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls, labels) / (pos_inds.numel() + N)

        return retinanet_cls_loss, retinanet_regression_loss
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)

                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                self.nms_thresh,
                                                score_field="scores")
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field("labels",
                                            jt.full((num_labels, ), j).int32())
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = jt.kthvalue(
                    cls_scores,
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                keep = cls_scores >= image_thresh
                keep = jt.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
Esempio n. 17
0
def run_model(config_file, img_f=None):
    original_image = load(img_f)
    from detectron.config import cfg
    from detectron.modeling.detector import build_detection_model
    from detectron.utils.checkpoint import DetectronCheckpointer
    from detectron.structures.image_list import to_image_list
    from detectron.modeling.roi_heads.mask_head.inference import Masker

    from jittor import transform as T
    from jittor import nn
    import jittor as jt
    from jittor_utils import auto_diff

    jt.flags.use_cuda = 1
    confidence_threshold = 0.0

    cfg.merge_from_file(config_file)
    model = build_detection_model(cfg)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    name = config_file.split('/')[-1].split('.')[0]
    # hook = auto_diff.Hook(name)
    # hook.hook_module(model)
    model.eval()

    class Resize(object):
        def __init__(self, min_size, max_size):
            self.min_size = min_size
            self.max_size = max_size

        # modified from torchvision to add support for max size
        def get_size(self, image_size):
            w, h = image_size
            size = self.min_size
            max_size = self.max_size
            if max_size is not None:
                min_original_size = float(min((w, h)))
                max_original_size = float(max((w, h)))
                if max_original_size / min_original_size * size > max_size:
                    size = int(
                        round(max_size * min_original_size /
                              max_original_size))

            if (w <= h and w == size) or (h <= w and h == size):
                return (h, w)

            if w < h:
                ow = size
                oh = int(size * h / w)
            else:
                oh = size
                ow = int(size * w / h)

            return (oh, ow)

        def __call__(self, image):
            size = self.get_size(image.size)
            image = T.resize(image, size)
            return image

    def build_transform():
        if cfg.INPUT.TO_BGR255:
            to_bgr_transform = T.Lambda(lambda x: x * 255)
        else:
            to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])

        normalize_transform = T.ImageNormalize(mean=cfg.INPUT.PIXEL_MEAN,
                                               std=cfg.INPUT.PIXEL_STD)
        min_size = cfg.INPUT.MIN_SIZE_TEST
        max_size = cfg.INPUT.MAX_SIZE_TEST
        transform = T.Compose([
            T.ToPILImage(),
            Resize(min_size, max_size),
            T.ToTensor(),
            to_bgr_transform,
            normalize_transform,
        ])
        return transform

    transforms = build_transform()
    image = transforms(original_image)
    image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY)
    predictions = model(image_list)

    predictions = predictions[0]
    if predictions.has_field("mask_scores"):
        scores = predictions.get_field("mask_scores")
    else:
        scores = predictions.get_field("scores")

    keep = jt.nonzero(scores > confidence_threshold).squeeze(1)
    predictions = predictions[keep]
    scores = predictions.get_field("scores")
    idx, _ = jt.argsort(scores, 0, descending=True)
    predictions = predictions[idx]

    result_diff(predictions)
    def __call__(self, locations, box_cls, box_regression, centerness, targets):
        """
        Arguments:
            locations (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            centerness (list[Tensor])
            targets (list[BoxList])

        Returns:
            cls_loss (Tensor)
            reg_loss (Tensor)
            centerness_loss (Tensor)
        """
        N = box_cls[0].size(0)
        num_classes = box_cls[0].size(1) // self.dense_points
        labels, reg_targets = self.prepare_targets(locations, targets)

        box_cls_flatten = []
        box_regression_flatten = []
        centerness_flatten = []
        labels_flatten = []
        reg_targets_flatten = []
        for l in range(len(labels)):
            box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(-1, num_classes))
            box_regression_flatten.append(box_regression[l].permute(0, 2, 3, 1).reshape(-1, 4))
            labels_flatten.append(labels[l].reshape(-1))
            reg_targets_flatten.append(reg_targets[l].reshape(-1, 4))
            centerness_flatten.append(centerness[l].permute(0, 2, 3, 1).reshape(-1))

        box_cls_flatten = jt.contrib.concat(box_cls_flatten, dim=0)
        box_regression_flatten = jt.contrib.concat(box_regression_flatten, dim=0)
        centerness_flatten = jt.contrib.concat(centerness_flatten, dim=0)
        labels_flatten = jt.contrib.concat(labels_flatten, dim=0)
        reg_targets_flatten = jt.contrib.concat(reg_targets_flatten, dim=0)
        pos_inds = jt.nonzero(labels_flatten > 0).squeeze(1)
        cls_loss = self.cls_loss_func(
            box_cls_flatten,
            labels_flatten.int()
        ) / (pos_inds.numel() + N)  # add N to avoid dividing by a zero

        box_regression_flatten = box_regression_flatten[pos_inds]
        reg_targets_flatten = reg_targets_flatten[pos_inds]
        centerness_flatten = centerness_flatten[pos_inds]

        if pos_inds.numel() > 0:
            centerness_targets = self.compute_centerness_targets(reg_targets_flatten)
            reg_loss = self.box_reg_loss_func(
                box_regression_flatten,
                reg_targets_flatten,
                centerness_targets,
            )
            centerness_loss = self.centerness_loss_func(
                centerness_flatten,
                centerness_targets
            )
        else:
            reg_loss = box_regression_flatten.sum()
            centerness_loss = centerness_flatten.sum()

        return cls_loss, reg_loss, centerness_loss
Esempio n. 19
0
    def __call__(self, locations, box_cls, box_regression, centerness,
                 proposal_embed, proposal_margin, pixel_embed, targets):
        """
        Arguments:
            locations (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            centerness (list[Tensor])
            targets (list[BoxList])

        Returns:
            cls_loss (Tensor)
            reg_loss (Tensor)
            centerness_loss (Tensor)
        """
        num_classes = box_cls[0].size(1)
        im_h = box_cls[4].shape[2] * self.fpn_strides[4]
        im_w = box_cls[4].shape[3] * self.fpn_strides[4]
        labels_per_level, reg_targets_per_level, labels, reg_targets, matched_idxes = self.prepare_targets(
            locations, targets, im_w, im_h)

        box_cls_flatten = []
        box_regression_flatten = []
        centerness_flatten = []
        labels_flatten = []
        reg_targets_flatten = []
        for l in range(len(labels_per_level)):
            box_cls_flatten.append(box_cls[l].transpose(0, 2, 3, 1).reshape(
                -1, num_classes))
            box_regression_flatten.append(box_regression[l].transpose(
                0, 2, 3, 1).reshape(-1, 4))
            labels_flatten.append(labels_per_level[l].reshape(-1))
            reg_targets_flatten.append(reg_targets_per_level[l].reshape(-1, 4))
            centerness_flatten.append(centerness[l].reshape(-1))

        box_cls_flatten = jt.contrib.concat(box_cls_flatten, dim=0)
        box_regression_flatten = jt.contrib.concat(box_regression_flatten,
                                                   dim=0)
        centerness_flatten = jt.contrib.concat(centerness_flatten, dim=0)
        labels_flatten = jt.contrib.concat(labels_flatten, dim=0)
        reg_targets_flatten = jt.contrib.concat(reg_targets_flatten, dim=0)

        pos_inds = jt.nonzero(labels_flatten > 0).squeeze(1)

        box_regression_flatten = box_regression_flatten[pos_inds]
        reg_targets_flatten = reg_targets_flatten[pos_inds]
        centerness_flatten = centerness_flatten[pos_inds]

        num_gpus = get_num_gpus()
        # sync num_pos from all gpus
        total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel()
                                                        ])).item()
        num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0)

        cls_loss = self.cls_loss_func(
            box_cls_flatten, labels_flatten.int()) / num_pos_avg_per_gpu

        if pos_inds.numel() > 0:
            centerness_targets = self.compute_centerness_targets(
                reg_targets_flatten)

            # average sum_centerness_targets from all gpus,
            # which is used to normalize centerness-weighed reg loss
            sum_centerness_targets_avg_per_gpu = \
                reduce_sum(centerness_targets.sum()).item() / float(num_gpus)
            reg_loss = self.box_reg_loss_func(
                box_regression_flatten, reg_targets_flatten,
                centerness_targets) / sum_centerness_targets_avg_per_gpu
            centerness_loss = self.centerness_loss_func(
                centerness_flatten, centerness_targets) / num_pos_avg_per_gpu
        else:
            reg_loss = box_regression_flatten.sum()
            reduce_sum(centerness_flatten.new_tensor([0.0]))
            centerness_loss = centerness_flatten.sum()

        #################################### Mask Related Losses ######################################
        # get positive proposal labels for each gt instance
        pos_proposal_labels_for_targets = self.get_pos_proposal_indexes(
            locations, box_regression, matched_idxes, targets)

        # get positive samples of embeddings & margins for each gt instance
        proposal_embed_for_targets, valids_for_targets = self.get_proposal_element(
            proposal_embed, pos_proposal_labels_for_targets)
        proposal_margin_for_targets, _ = self.get_proposal_element(
            proposal_margin, pos_proposal_labels_for_targets)

        ######## MEANINGLESS_LOSS #######
        mask_loss = box_cls[0].new_tensor(0.0)
        for i in range(len(proposal_embed)):
            mask_loss += 0 * proposal_embed[i].sum()
            mask_loss += 0 * proposal_margin[i].sum()
        mask_loss += 0 * pixel_embed.sum()
        ############ Mask Losses ##############
        # get target masks in prefer size
        N, _, m_h, m_w = pixel_embed.shape
        o_h = m_h * self.mask_scale_factor
        o_w = m_w * self.mask_scale_factor
        r_h = int(m_h * self.fpn_strides[0])
        r_w = int(m_w * self.fpn_strides[0])
        stride = self.fpn_strides[0] / self.mask_scale_factor
        targets_masks = [
            target_im.get_field('masks').convert('mask').instances.masks.to(
                device=pixel_embed.device) for target_im in targets
        ]
        masks_t = self.prepare_masks(o_h, o_w, r_h, r_w, targets_masks)
        pixel_embed = interpolate(input=pixel_embed,
                                  size=(o_h, o_w),
                                  mode="bilinear",
                                  align_corners=False)

        if self.loss_mask_alpha > 0:
            for im in range(N):
                valid = valids_for_targets[im]
                if valid.sum() == 0:
                    continue
                proposal_embed_im = proposal_embed_for_targets[im][valid]
                proposal_margin_im = proposal_margin_for_targets[im][valid]
                masks_t_im = masks_t[im][valid]
                boxes_t_im = targets[im].bbox[valid] / stride

                masks_prob = self.compute_mask_prob(proposal_embed_im,
                                                    proposal_margin_im,
                                                    pixel_embed[im])
                masks_prob_crop, crop_mask = crop_by_box(
                    masks_prob, boxes_t_im, self.box_padding)
                mask_loss_per_target = self.mask_loss_func(masks_prob_crop,
                                                           masks_t_im,
                                                           mask=crop_mask,
                                                           act=True)

                mask_loss += mask_loss_per_target.mean()

            mask_loss = mask_loss / N * self.loss_mask_alpha

        return cls_loss, reg_loss, centerness_loss, mask_loss
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        # inds_all = (scores > self.score_thresh).int()
        inds_all = scores > self.score_thresh
        # print(self.score_thresh,num_classes)
        # print(inds_all.shape)
        # inds_all = inds_all.transpose(1,0)
        inds_nonzeros = [ inds_all[:,j].nonzero() for j in range(1, num_classes) ]
        jt.sync(inds_nonzeros)

        for j in range(1, num_classes):
            # with nvtx_scope("aa"):
            #     inds = inds_all[:,j].nonzero().squeeze(1)
                
            # with nvtx_scope("bb"):
            #     scores_j = scores[inds, j]
            #     boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            # with nvtx_scope("cc"):
            #     boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            # with nvtx_scope("cc2"):
            #     boxlist_for_class.add_field("scores", scores_j)
            # with nvtx_scope("cc3"):
            #     boxlist_for_class = boxlist_nms(
            #         boxlist_for_class, self.nms
            #     )
            # with nvtx_scope("dd"):
            #     num_labels = len(boxlist_for_class)
            # with nvtx_scope("dd2"):
            #     boxlist_for_class.add_field(
            #         "labels", jt.full((num_labels,), j).int32()
            #     )
            #     result.append(boxlist_for_class)

            # inds = inds_all[:,j].nonzero().squeeze(1)
            inds = inds_nonzeros[j-1]
            if inds.shape[0] == 0:
                continue
            inds = inds.squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms
                )
            num_labels = len(boxlist_for_class)
            # print(j,num_labels)

            boxlist_for_class.add_field(
                    "labels", jt.full((num_labels,), j).int32()
                )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        if not result.has_field('labels'):
            result.add_field('labels',jt.empty((0,)))
        if not result.has_field('scores'):
            result.add_field('scores',jt.empty((0,)))
        number_of_detections = len(result)

        #Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = jt.kthvalue(
                cls_scores, number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh
            keep = jt.nonzero(keep).squeeze(1)
            result = result[keep]
        # # Absolute limit detection imgs
        # if number_of_detections > self.detections_per_img > 0:
        #     cls_scores = result.get_field("scores")
        #     scores, indices = jt.topk(
        #         cls_scores, self.detections_per_img
        #     )
        #     result = result[indices]
        return result