Example #1
0
    def _simple_roialign_with_grad(self, img, box, resolution, device):
        if isinstance(resolution, int):
            resolution = (resolution, resolution)

        op = ROIAlign(resolution, 1.0, 0, aligned=True)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        input = input.to(device=device)
        rois = rois.to(device=device)
        input.requires_grad = True
        output = op.forward(input, rois)
        return input, output
Example #2
0
    def crop_and_resize(self, boxes: torch.Tensor,
                        mask_size: int) -> torch.Tensor:
        """
        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
        This can be used to prepare training targets for Mask R-CNN.
        It has less reconstruction error compared to rasterization with polygons.
        However we observe no difference in accuracy,
        but BitMasks requires more memory to store all the masks.

        Args:
            boxes (Tensor): Nx4 tensor storing the boxes for each mask
            mask_size (int): the size of the rasterized mask.

        Returns:
            Tensor:
                A bool tensor of shape (N, mask_size, mask_size), where
                N is the number of predicted boxes for this image.
        """
        assert len(boxes) == len(self), "{} != {}".format(
            len(boxes), len(self))
        device = self.tensor.device

        batch_inds = torch.arange(len(boxes),
                                  device=device).to(dtype=boxes.dtype)[:, None]
        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5

        bit_masks = self.tensor.to(dtype=torch.float32)
        rois = rois.to(device=device)
        output = (ROIAlign((mask_size, mask_size), 1.0, 0,
                           aligned=True).forward(bit_masks[:, None, :, :],
                                                 rois).squeeze(1))
        output = output >= 0.5
        return output
    def _simple_roialign(self, img, box, resolution, aligned=True):
        """
        RoiAlign with scale 1.0 and 0 sample ratio.
        """
        if isinstance(resolution, int):
            resolution = (resolution, resolution)
        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        output = op.forward(input, rois)
        if torch.cuda.is_available():
            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
            self.assertTrue(torch.allclose(output, output_cuda))
        return output[0, 0]
    def test_roi_align_rotated_gradient_cuda(self):
        """
        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
        and compare the result with ROIAlign
        """
        # torch.manual_seed(123)
        dtype = torch.float64
        device = torch.device("cuda")
        pool_h, pool_w = (5, 5)

        roi_align = ROIAlign(output_size=(pool_h, pool_w),
                             spatial_scale=1,
                             sampling_ratio=2).to(device=device)

        roi_align_rotated = ROIAlignRotated(output_size=(pool_h, pool_w),
                                            spatial_scale=1,
                                            sampling_ratio=2).to(device=device)

        x = torch.rand(1,
                       1,
                       10,
                       10,
                       dtype=dtype,
                       device=device,
                       requires_grad=True)
        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
        x_rotated = Variable(x.data.clone(), requires_grad=True)

        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
        rois_rotated = torch.tensor(
            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
            dtype=dtype,
            device=device,
        )

        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
        s_rotated = y_rotated.sum()
        s_rotated.backward()

        # roi format is (batch index, x1, y1, x2, y2)
        rois = torch.tensor(
            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]],
            dtype=dtype,
            device=device)

        y = roi_align(x, rois)
        s = y.sum()
        s.backward()

        assert torch.allclose(
            x.grad, x_rotated.grad
        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
Example #5
0
def crop_resize_by_d2_roialign(
    img,
    center,
    scale,
    output_size,
    aligned=True,
    interpolation="bilinear",
    in_format="HWC",
    out_format="HWC",
    dtype="float32",
):
    """
    img: HWC
    output_size: int or (w, h)
    """
    import torch
    from detectron2.layers.roi_align import ROIAlign
    from torchvision.ops import RoIPool

    if isinstance(output_size, int):
        output_size = (output_size, output_size)
    # NOTE: different to cv2 convention!!!
    output_size = (output_size[1], output_size[0])  # to (h, w)
    if interpolation == "bilinear":
        op = ROIAlign(output_size, 1.0, 0, aligned=aligned)
    elif interpolation == "nearest":
        op = RoIPool(output_size, 1.0)  #
    else:
        raise ValueError(f"Wrong interpolation type: {interpolation}")

    assert in_format in ["HW", "HWC", "CHW"]
    if in_format == "HW":
        img = img[None]
    elif in_format == "HWC":
        img = img.transpose(2, 0, 1)  # CHW

    img_tensor = torch.tensor(img[None].astype("float32"))
    cx, cy = center
    if isinstance(scale, (int, float)):
        scale = (scale, scale)
    bw, bh = scale
    rois = torch.tensor(np.array([0] + [cx - bw / 2, cy - bh / 2, cx + bw / 2, cy + bh / 2], dtype="float32")[None])
    result = op(img_tensor, rois)[0].numpy().astype(dtype)
    if out_format == "HWC":
        result = result.transpose(1, 2, 0)
    return result
Example #6
0
 def test_empty_batch(self):
     input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
     rois = torch.zeros(0, 5, dtype=torch.float32)
     op = ROIAlign((7, 7), 1.0, 0, aligned=True)
     output = op.forward(input, rois)
     self.assertTrue(output.shape == (0, 3, 7, 7))
def measure_roialign_perf(input_shape, roi_shape, output_size, spatial_scale,
                          sampling_ratio=0, aligned=True):
    """
    Args:
        input: NCHW images
        rois: Bx5 boxes. First column is the index into N. The other 4 columns
            are xyxy.
        output_size (tuple): h, w
        spatial_scale (float): scale the input boxes by this number
        sampling_ratio (int): number of inputs samples to take for each output
            sample. 0 to take samples densely.
        aligned (bool): if False, use the legacy implementation in Detectron.
            If True, align the results more perfectly.
    """
    assert roi_shape[1] == 5, "ERROR: ROI shape expected to be of form (m,5)"
    
    # Preparing Inputs
    n = input_shape[0]
    b = roi_shape[0]
    inputbatch = torch.randn(input_shape, dtype=torch.float, requires_grad=True)
    # creating ROI tensor - shape (b,5)
    # RoI tensor [:, 1:] contains coordiantes of bounding boxes - xyxy.
    # (100,1200) range chosen based on COCO max image size.
    bboxes = torch.FloatTensor(roi_shape[0], 4).uniform_(100,1200)
    # First column of RoI tensor maps bounding box to image in batch.
    # Based on my observations, the boxes are ordered by image index in batch,
    # ie all boxes corresponding to first image first, then for the second
    # image, third image and so on.
    boxToNMapping = torch.tensor(
        np.expand_dims(np.array([i * n // b for i in range(b)]), axis=1),
        dtype=torch.float)
    roi = torch.cat((boxToNMapping, bboxes), dim=1)
    roi.requires_grad=True
    #print(inputbatch.shape, roi.shape)

    # Defining Op
    roi_align = ROIAlign(output_size, spatial_scale, sampling_ratio, aligned)
    
    roi_align.cuda()    
    inputbatch = inputbatch.cuda()
    roi = roi.cuda()

    # Forward Pass
    # warmup - 2 iters
    roi_align.forward(inputbatch, roi)
    roi_align.forward(inputbatch, roi)
    
    torch.cuda.synchronize()
    start = time.time()
    for _ in range(ITERATIONS):
        #output = roi_align.forward(inputbatch.cuda(), roi.cuda()) 
        output = roi_align.forward(inputbatch, roi) 
    torch.cuda.synchronize()
    end = time.time()       
    fwd_time = (end - start) * 1000 / ITERATIONS

    # Backward Pass
    # required hack to call backward()
    output_sum = output.sum()
    # warmup
    output_sum.backward(retain_graph=True)
    output_sum.backward(retain_graph=True)

    torch.cuda.synchronize()
    bwd_start = time.time()
    for _ in range(ITERATIONS):
        output_sum.backward(retain_graph=True)
    torch.cuda.synchronize()
    bwd_end = time.time()       
    bwd_time = (bwd_end - bwd_start) * 1000 / ITERATIONS
    
    return fwd_time, bwd_time
Example #8
0
                    os.path.join(save_dir, '{}.pth'.format(idx * b_s + i)))


batch_size = 32
box_num = 10
clips_len = 32
dataset = 'something'
cuda = True

path = 'data/{}/feats'.format(dataset)

# make directory for extracted features
mkdir(path)

# set up some layers
roi = ROIAlign((7, 7), 7.0 / 224.0, 0)
avg_pool2d = torch.nn.AdaptiveMaxPool2d((1, 1))
avg_pool3d = nn.AdaptiveAvgPool3d((1, 1, 1))

# set up base network
net = resnet.i3_res50_nl(num_classes=400, pretrained=True)
# net = resnet.i3_res50(num_classes=400, pretrained=True)
if cuda:
    net.cuda()
net = nn.DataParallel(net)
net.eval()

# set up dataloader
testset = something.Something(root='data/{}'.format(dataset),
                              split='val',
                              clip_len=clips_len)