Ejemplo n.º 1
    def __evaluate_image_preds_no_gt(
        self, det: Tensor, idx: int, det_label_mask: Tensor, max_det: int, area_range: Tuple[int, int], nb_iou_thrs: int
    ) -> Dict[str, Any]:
        """Some predictions but no GT."""
        # GTs
        nb_gt = 0
        gt_ignore = torch.zeros(nb_gt, dtype=torch.bool, device=self.device)

        # Detections
        det = det[det_label_mask]
        scores = self.detection_scores[idx]
        scores_filtered = scores[det_label_mask]
        scores_sorted, dtind = torch.sort(scores_filtered, descending=True)
        det = det[dtind]
        if len(det) > max_det:
            det = det[:max_det]
        nb_det = len(det)
        det_areas = box_area(det).to(self.device)
        det_ignore_area = (det_areas < area_range[0]) | (det_areas > area_range[1])
        ar = det_ignore_area.reshape((1, nb_det))
        det_ignore = torch.repeat_interleave(ar, nb_iou_thrs, 0)

        return {
            "dtMatches": torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool, device=self.device),
            "gtMatches": torch.zeros((nb_iou_thrs, nb_gt), dtype=torch.bool, device=self.device),
            "dtScores": scores_sorted,
            "gtIgnore": gt_ignore,
            "dtIgnore": det_ignore,
Ejemplo n.º 2
    def __evaluate_image_gt_no_preds(self, gt: Tensor, gt_label_mask: Tensor,
                                     area_range: Tuple[int, int],
                                     nb_iou_thrs: int) -> Dict[str, Any]:
        """Some GT but no predictions."""
        # GTs
        gt = gt[gt_label_mask]
        nb_gt = len(gt)
        areas = box_area(gt)
        ignore_area = (areas < area_range[0]) | (areas > area_range[1])
        gt_ignore, _ = torch.sort(ignore_area.to(torch.uint8))
        gt_ignore = gt_ignore.to(torch.bool)

        # Detections
        nb_det = 0
        det_ignore = torch.zeros((nb_iou_thrs, nb_det),

        return {
            torch.zeros((nb_iou_thrs, nb_det),
            torch.zeros((nb_iou_thrs, nb_gt),
            torch.zeros(nb_det, dtype=torch.bool, device=self.device),
Ejemplo n.º 3
 def test_box_area(self):
     # A bounding box of area 10000 and a degenerate case
     box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=torch.float)
     expected = torch.tensor([10000, 0])
     calc_area = ops.box_area(box_tensor)
     assert calc_area.size() == torch.Size([2])
     assert calc_area.dtype == box_tensor.dtype
     assert torch.all(torch.eq(calc_area, expected)).item() is True
def stats_dataset(dataset: ObjectDetectionDataSet,
                  rcnn_transform: GeneralizedRCNNTransform = False):
    Iterates over the dataset and returns some stats.
    Can be useful to pick the right anchor box sizes.
    from torchvision.ops import box_convert, box_area
    stats = {
        'image_height': [],
        'image_width': [],
        'image_mean': [],
        'image_std': [],
        'boxes_height': [],
        'boxes_width': [],
        'boxes_num': [],
        'boxes_area': []
    for batch in dataset:
        # Batch
        x, y, x_name, y_name = batch['x'], batch['y'], batch['x_name'], batch[

        # Transform
        if rcnn_transform:
            x, y = rcnn_transform([x], [y])
            x, y = x.tensors, y[0]

        # Image

        # Target
        wh = box_convert(y['boxes'], 'xyxy', 'xywh')[:, -2:]
        stats['boxes_height'].append(wh[:, -2])
        stats['boxes_width'].append(wh[:, -1])

    stats['image_height'] = torch.tensor(stats['image_height'],
    stats['image_width'] = torch.tensor(stats['image_width'],
    stats['image_mean'] = torch.tensor(stats['image_mean'], dtype=torch.float)
    stats['image_std'] = torch.tensor(stats['image_std'], dtype=torch.float)
    stats['boxes_height'] = torch.cat(stats['boxes_height'])
    stats['boxes_width'] = torch.cat(stats['boxes_width'])
    stats['boxes_area'] = torch.cat(stats['boxes_area'])
    stats['boxes_num'] = torch.tensor(stats['boxes_num'], dtype=torch.float)

    return stats
Ejemplo n.º 5
    def _encode_targets(self, cls_labels, bbox_labels, instance_mask_labels):
        points = self.points.clone()
        regress_ranges = self.regress_ranges.clone()

        num_points = points.size(0)
        num_gts = cls_labels.size(0)

        regress_ranges = regress_ranges[:, None, :].repeat(
            1, num_gts, 1)  # [num_points, num_gts, 2]
        bbox_areas = cv_ops.box_area(bbox_labels)[None].repeat(
            num_points, 1)  # [num_points, num_gts]

        expanded_points = points[:, None, :].repeat(1, num_gts, 1)
        expanded_bboxes = bbox_labels[None, :, :].repeat(num_points, 1, 1)
        distance_targets = bbox_ops.convert_bbox_to_distance(
            expanded_points, expanded_bboxes)  # [num_points, num_gts, 4]
        # instance_mask_labels = instance_mask_labels[None, :, :, :].repeat(num_points, 1, 1, 1)  # [num_points, num_gts, roi_size, roi_size]

        # Condition 1: inside a gt bbox
        inside_gt_bbox_mask = distance_targets.min(
            dim=-1)[0] > 0  # [num_points, num_gts]

        # Condition 2: limit the regression range for each location
        max_regress_distance = distance_targets.max(
            dim=-1)[0]  # [num_points, num_gts]
        inside_regress_range = (
            max_regress_distance >= regress_ranges[..., 0]) & (
                max_regress_distance <= regress_ranges[..., 1]
            )  # [num_points, num_gts]

        # If there are still more than one instances for a location, we choose the one with minimal area
        bbox_areas[inside_gt_bbox_mask == 0] = tools.INF
        bbox_areas[inside_regress_range == 0] = tools.INF
        min_area, min_area_idx = bbox_areas.min(
            dim=1)  # [num_points], Assign a gt to each location

        class_targets = cls_labels[min_area_idx]
        class_targets[min_area == tools.INF] = 0

        distance_targets = distance_targets[range(num_points), min_area_idx, :]
        # instance_mask_labels = instance_mask_labels[range(num_points), min_area_idx, :, :]

        return class_targets, distance_targets  # , instance_mask_labels
Ejemplo n.º 6
 def area_check(box, expected, tolerance=1e-4):
     out = ops.box_area(box)
     assert out.size() == expected.size()
     assert ((out - expected).abs().max() < tolerance).item()
Ejemplo n.º 7
 def area_check(box, expected, tolerance=1e-4):
     out = ops.box_area(box)
     torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance)
Ejemplo n.º 8
# 读写图像: torchvision.io包
tensor = io.read_image("../../data/image/1.jpg")
print("tensor shape:", tensor.shape)
io.write_png(tensor, "../../data/image/result.png")

tensor = io.read_image("../../data/image/lena.png")
print("tensor shape:", tensor.shape)
io.write_jpeg(tensor, "../../data/image/result.jpg")

# 下载pre-trained AlexNet模型: torchvision.models包
net = models.alexnet(pretrained=True)

# 计算机视觉操作: torchvision.ops包
boxes = torch.tensor([[1, 1, 101, 101], [3, 5, 13, 15], [2, 4, 22, 44]])
area = ops.box_area(boxes)
print(f"area: {area}")

index = ops.remove_small_boxes(boxes, min_size=20)
print(f"index: {index}")

# 图像变换: torchvision.transforms包
resize = transforms.Resize(size=[256, 128])
img = resize.forward(tensor)
io.write_jpeg(img, "../../data/image/resize.jpg")

grayscale = transforms.Grayscale()
img2 = grayscale.forward(img)
io.write_jpeg(img2, "../../data/image/gray.jpg")

affine = transforms.RandomAffine(degrees=35)
Ejemplo n.º 9
    def _evaluate_image(
        self, idx: int, class_id: int, area_range: Tuple[int, int], max_det: int, ious: dict
    ) -> Optional[dict]:
        """Perform evaluation for single class and image.

                Image Id, equivalent to the index of supplied samples.
                Class Id of the supplied ground truth and detection labels.
                List of lower and upper bounding box area threshold.
                Maximum number of evaluated detection bounding boxes.
                IoU results for image and class.
        gt = self.groundtruth_boxes[idx]
        det = self.detection_boxes[idx]
        gt_label_mask = self.groundtruth_labels[idx] == class_id
        det_label_mask = self.detection_labels[idx] == class_id

        # No Gt and No predictions --> ignore image
        if len(gt_label_mask) == 0 and len(det_label_mask) == 0:
            return None

        nb_iou_thrs = len(self.iou_thresholds)

        # Some GT but no predictions
        if len(gt_label_mask) > 0 and len(det_label_mask) == 0:
            return self.__evaluate_image_gt_no_preds(gt, gt_label_mask, area_range, nb_iou_thrs)

        # Some predictions but no GT
        if len(gt_label_mask) == 0 and len(det_label_mask) >= 0:
            return self.__evaluate_image_preds_no_gt(det, idx, det_label_mask, max_det, area_range, nb_iou_thrs)

        gt = gt[gt_label_mask]
        det = det[det_label_mask]
        if gt.numel() == 0 and det.numel() == 0:
            return None

        areas = box_area(gt)
        ignore_area = (areas < area_range[0]) | (areas > area_range[1])

        # sort dt highest score first, sort gt ignore last
        ignore_area_sorted, gtind = torch.sort(ignore_area.to(torch.uint8))
        # Convert to uint8 temporarily and back to bool, because "Sort currently does not support bool dtype on CUDA"
        ignore_area_sorted = ignore_area_sorted.to(torch.bool)
        gt = gt[gtind]
        scores = self.detection_scores[idx]
        scores_filtered = scores[det_label_mask]
        scores_sorted, dtind = torch.sort(scores_filtered, descending=True)
        det = det[dtind]
        if len(det) > max_det:
            det = det[:max_det]
        # load computed ious
        ious = ious[idx, class_id][:, gtind] if len(ious[idx, class_id]) > 0 else ious[idx, class_id]

        nb_iou_thrs = len(self.iou_thresholds)
        nb_gt = len(gt)
        nb_det = len(det)
        gt_matches = torch.zeros((nb_iou_thrs, nb_gt), dtype=torch.bool)
        det_matches = torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool)
        gt_ignore = ignore_area_sorted
        det_ignore = torch.zeros((nb_iou_thrs, nb_det), dtype=torch.bool)

        if torch.numel(ious) > 0:
            for idx_iou, t in enumerate(self.iou_thresholds):
                for idx_det, _ in enumerate(det):
                    m = MeanAveragePrecision._find_best_gt_match(t, gt_matches, idx_iou, gt_ignore, ious, idx_det)
                    if m == -1:
                    det_ignore[idx_iou, idx_det] = gt_ignore[m]
                    det_matches[idx_iou, idx_det] = 1
                    gt_matches[idx_iou, m] = 1

        # set unmatched detections outside of area range to ignore
        det_areas = box_area(det)
        det_ignore_area = (det_areas < area_range[0]) | (det_areas > area_range[1])
        ar = det_ignore_area.reshape((1, nb_det))
        det_ignore = torch.logical_or(
            det_ignore, torch.logical_and(det_matches == 0, torch.repeat_interleave(ar, nb_iou_thrs, 0))
        return {
            "dtMatches": det_matches,
            "gtMatches": gt_matches,
            "dtScores": scores_sorted,
            "gtIgnore": gt_ignore,
            "dtIgnore": det_ignore,