Exemple #1
0
    def get_ground_truth(self, anchors, targets):
        """
        Args:
            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
                list of #feature level Boxes. The Boxes contains anchors of
                this image on the specific feature level.
            targets (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.

        Returns:
            gt_classes (Tensor):
                An integer tensor of shape (N, R) storing ground-truth
                labels for each anchor.
                R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels.
                Anchors with an IoU with some target higher than the foreground threshold
                are assigned their corresponding label in the [0, K-1] range.
                Anchors whose IoU are below the background threshold are assigned
                the label "K". Anchors whose IoU are between the foreground and background
                thresholds are assigned a label "-1", i.e. ignore.
            gt_anchors_deltas (Tensor):
                Shape (N, R, 4).
                The last dimension represents ground-truth box2box transform
                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
                The values in the tensor are meaningful only when the corresponding
                anchor is labeled as foreground.
        """
        gt_classes = []
        gt_anchors_deltas = []
        anchors = [Boxes.cat(anchors_i) for anchors_i in anchors]
        # list[Tensor(R, 4)], one for each image

        for anchors_per_image, targets_per_image in zip(anchors, targets):
            match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes,
                                                anchors_per_image)
            gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix)

            has_gt = len(targets_per_image) > 0
            if has_gt:
                # ground truth box regression
                matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs]
                gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
                    anchors_per_image.tensor, matched_gt_boxes.tensor)

                gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_classes_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_classes_i[anchor_labels == -1] = -1
            else:
                gt_classes_i = torch.zeros_like(
                    gt_matched_idxs) + self.num_classes
                gt_anchors_reg_deltas_i = torch.zeros_like(
                    anchors_per_image.tensor)

            gt_classes.append(gt_classes_i)
            gt_anchors_deltas.append(gt_anchors_reg_deltas_i)

        return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
Exemple #2
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single Boxes per image
        anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes,
                                                       anchors, self.gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i,
                                                                   anchors_i)
            matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(
                self.anchor_matcher)(match_quality_matrix)
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_objectness_logits_i = gt_objectness_logits_i.to(
                device=gt_boxes_i.device)
            del match_quality_matrix

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in mydl
                anchors_inside_image = anchors_i.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Exemple #3
0
def point_sample_fine_grained_features(features_list, feature_scales, boxes,
                                       point_coords):
    """
    Get features from feature maps in `features_list` that correspond to specific point coordinates
        inside each bounding box from `boxes`.

    Args:
        features_list (list[Tensor]): A list of feature map tensors to get features from.
        feature_scales (list[float]): A list of scales for tensors in `features_list`.
        boxes (list[Boxes]): A list of I Boxes  objects that contain R_1 + ... + R_I = R boxes all
            together.
        point_coords (Tensor): A tensor of shape (R, P, 2) that contains
            [0, 1] x [0, 1] box-normalized coordinates of the P sampled points.

    Returns:
        point_features (Tensor): A tensor of shape (R, C, P) that contains features sampled
            from all features maps in feature_list for P sampled points for all R boxes in `boxes`.
        point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains image-level
            coordinates of P points.
    """
    cat_boxes = Boxes.cat(boxes)
    num_boxes = [len(b) for b in boxes]

    point_coords_wrt_image = get_point_coords_wrt_image(
        cat_boxes.tensor, point_coords)
    split_point_coords_wrt_image = torch.split(point_coords_wrt_image,
                                               num_boxes)

    point_features = []
    for idx_img, point_coords_wrt_image_per_image in enumerate(
            split_point_coords_wrt_image):
        point_features_per_image = []
        for idx_feature, feature_map in enumerate(features_list):
            h, w = feature_map.shape[-2:]
            scale = torch.tensor(
                [w, h],
                device=feature_map.device) / feature_scales[idx_feature]
            point_coords_scaled = point_coords_wrt_image_per_image / scale
            point_features_per_image.append(
                point_sample(
                    feature_map[idx_img].unsqueeze(0),
                    point_coords_scaled.unsqueeze(0),
                    align_corners=False,
                ).squeeze(0).transpose(1, 0))
        point_features.append(cat(point_features_per_image, dim=1))

    return cat(point_features, dim=0), point_coords_wrt_image
Exemple #4
0
    def inference(self, pred_logits, pred_deltas, pred_masks, anchors, indexes,
                  images):
        """
        Arguments:
            pred_logits, pred_deltas, pred_masks: Same as the output of:
                meth:`TensorMaskHead.forward`
            anchors, indexes: Same as the input of meth:`TensorMask.get_ground_truth`
            images (ImageList): the input images

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(anchors) == len(images)
        results = []

        pred_logits = [
            permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits
        ]
        pred_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_deltas]

        pred_logits = cat(pred_logits, dim=1)
        pred_deltas = cat(pred_deltas, dim=1)

        for img_idx, (anchors_im,
                      indexes_im) in enumerate(zip(anchors, indexes)):
            # Get the size of the current image
            image_size = images.image_sizes[img_idx]

            logits_im = pred_logits[img_idx]
            deltas_im = pred_deltas[img_idx]

            if self.mask_on:
                masks_im = [[mla[img_idx] for mla in ml] for ml in pred_masks]
            else:
                masks_im = [None] * self.num_levels
            results_im = self.inference_single_image(
                logits_im,
                deltas_im,
                masks_im,
                Boxes.cat(anchors_im),
                cat(indexes_im),
                tuple(image_size),
            )
            results.append(results_im)
        return results
Exemple #5
0
    def get_ground_truth(self, anchors, unit_lengths, indexes, targets):
        """
        Args:
            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
                list of #feature level Boxes. The Boxes contains anchors of
                this image on the specific feature level.
            unit_lengths (list[list[Tensor]]): a list of N=#image elements. Each is a
                list of #feature level Tensor. The tensor contains unit lengths for anchors of
                this image on the specific feature level.
            indexes (list[list[Tensor]]): a list of N=#image elements. Each is a
                list of #feature level Tensor. The tensor contains the 5D index of
                each anchor, the second dimension means (L, N, H, W, A), where L
                is level, I is image, H is height, W is width, and A is anchor.
            targets (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.

        Returns:
            gt_class_info (Tensor, Tensor): A pair of two tensors for classification.
                The first one is an integer tensor of shape (R, #classes) storing ground-truth
                labels for each anchor. R is the total number of anchors in the batch.
                The second one is an integer tensor of shape (R,), to indicate which
                anchors are valid for loss computation, which anchors are not.
            gt_delta_info (Tensor, Tensor): A pair of two tensors for boxes.
                The first one, of shape (F, 4). F=#foreground anchors.
                The last dimension represents ground-truth box2box transform
                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
                Only foreground anchors have values in this tensor. Could be `None` if F=0.
                The second one, of shape (R,), is an integer tensor indicating which anchors
                are foreground ones used for box regression. Could be `None` if F=0.
            gt_mask_info (list[list[Tensor]], list[list[Tensor]]): A pair of two lists for masks.
                The first one is a list of P=#feature level elements. Each is a
                list of A=#anchor tensors. Each tensor contains the ground truth
                masks of the same size and for the same feature level. Could be `None`.
                The second one is a list of P=#feature level elements. Each is a
                list of A=#anchor tensors. Each tensor contains the location of the ground truth
                masks of the same size and for the same feature level. The second dimension means
                (N, H, W), where N is image, H is height, and W is width. Could be `None`.
            num_fg (int): F=#foreground anchors, used later for loss normalization.
        """
        gt_classes = []
        gt_deltas = []
        gt_masks = [[[] for _ in range(self.num_anchors)]
                    for _ in range(self.num_levels)]
        gt_mask_inds = [[[] for _ in range(self.num_anchors)]
                        for _ in range(self.num_levels)]

        anchors = [Boxes.cat(anchors_i) for anchors_i in anchors]
        unit_lengths = [cat(unit_lengths_i) for unit_lengths_i in unit_lengths]
        indexes = [cat(indexes_i) for indexes_i in indexes]

        num_fg = 0
        for i, (anchors_im, unit_lengths_im, indexes_im,
                targets_im) in enumerate(
                    zip(anchors, unit_lengths, indexes, targets)):
            # Initialize all
            gt_classes_i = torch.full_like(unit_lengths_im,
                                           self.num_classes,
                                           dtype=torch.int64,
                                           device=self.device)
            # Ground truth classes
            has_gt = len(targets_im) > 0
            if has_gt:
                # Compute the pairwise matrix
                gt_matched_inds, anchor_labels = _assignment_rule(
                    targets_im.gt_boxes, anchors_im, unit_lengths_im,
                    self.min_anchor_size)
                # Find the foreground instances
                fg_inds = anchor_labels == 1
                fg_anchors = anchors_im[fg_inds]
                num_fg += len(fg_anchors)
                # Find the ground truths for foreground instances
                gt_fg_matched_inds = gt_matched_inds[fg_inds]
                # Assign labels for foreground instances
                gt_classes_i[fg_inds] = targets_im.gt_classes[
                    gt_fg_matched_inds]
                # Anchors with label -1 are ignored, others are left as negative
                gt_classes_i[anchor_labels == -1] = -1

                # Boxes
                # Ground truth box regression, only for foregrounds
                matched_gt_boxes = targets_im[gt_fg_matched_inds].gt_boxes
                # Compute box regression offsets for foregrounds only
                gt_deltas_i = self.box2box_transform.get_deltas(
                    fg_anchors.tensor, matched_gt_boxes.tensor)
                gt_deltas.append(gt_deltas_i)

                # Masks
                if self.mask_on:
                    # Compute masks for each level and each anchor
                    matched_indexes = indexes_im[fg_inds, :]
                    for lvl in range(self.num_levels):
                        ids_lvl = matched_indexes[:, 0] == lvl
                        if torch.any(ids_lvl):
                            cur_level_factor = 2**lvl if self.bipyramid_on else 1
                            for anc in range(self.num_anchors):
                                ids_lvl_anchor = ids_lvl & (
                                    matched_indexes[:, 4] == anc)
                                if torch.any(ids_lvl_anchor):
                                    gt_masks[lvl][anc].append(
                                        targets_im[
                                            gt_fg_matched_inds[ids_lvl_anchor]]
                                        .gt_masks.crop_and_resize(
                                            fg_anchors[ids_lvl_anchor].tensor,
                                            self.mask_sizes[anc] *
                                            cur_level_factor,
                                        ))
                                    # Select (N, H, W) dimensions
                                    gt_mask_inds_lvl_anc = matched_indexes[
                                        ids_lvl_anchor, 1:4]
                                    # Set the image index to the current image
                                    gt_mask_inds_lvl_anc[:, 0] = i
                                    gt_mask_inds[lvl][anc].append(
                                        gt_mask_inds_lvl_anc)
            gt_classes.append(gt_classes_i)

        # Classes and boxes
        gt_classes = cat(gt_classes)
        gt_valid_inds = gt_classes >= 0
        gt_fg_inds = gt_valid_inds & (gt_classes < self.num_classes)
        gt_classes_target = torch.zeros(
            (gt_classes.shape[0], self.num_classes),
            dtype=torch.float32,
            device=self.device)
        gt_classes_target[gt_fg_inds, gt_classes[gt_fg_inds]] = 1
        gt_deltas = cat(gt_deltas) if gt_deltas else None

        # Masks
        gt_masks = [[cat(mla) if mla else None for mla in ml]
                    for ml in gt_masks]
        gt_mask_inds = [[cat(ila) if ila else None for ila in il]
                        for il in gt_mask_inds]
        return (
            (gt_classes_target, gt_valid_inds),
            (gt_deltas, gt_fg_inds),
            (gt_masks, gt_mask_inds),
            num_fg,
        )