Exemple #1
0
    def test_get_bounding_box(self):
        masks = torch.tensor([
            [
                [False, False, False, True],
                [False, False, True, True],
                [False, True, True, False],
                [False, True, True, False],
            ],
            [
                [False, False, False, False],
                [False, False, True, False],
                [False, True, True, False],
                [False, True, True, False],
            ],
            torch.zeros(4, 4),
        ])
        bitmask = BitMasks(masks)
        box_true = torch.tensor([[1, 0, 4, 4], [1, 1, 3, 4], [0, 0, 0, 0]],
                                dtype=torch.float32)
        box = bitmask.get_bounding_boxes()
        self.assertTrue(torch.all(box.tensor == box_true).item())

        for box in box_true:
            poly = box[[0, 1, 2, 1, 2, 3, 0, 3]].numpy()
            mask = polygons_to_bitmask([poly], 4, 4)
            reconstruct_box = BitMasks(
                mask[None, :, :]).get_bounding_boxes()[0].tensor
            self.assertTrue(torch.all(box == reconstruct_box).item())

            reconstruct_box = PolygonMasks([[poly]
                                            ]).get_bounding_boxes()[0].tensor
            self.assertTrue(torch.all(box == reconstruct_box).item())
    def __call__(self, dataset_dict):
        # read data
        data = super(TrainMapper, self).__call__(dataset_dict)
        instance = data['instances']

        # decode gt_mask from polygon to bitmask
        h, w = instance.image_size
        instance.gt_masks = BitMasks.from_polygon_masks(
            instance.gt_masks, h, w)

        # augmentations
        img = data['image'].numpy().transpose(1, 2, 0).astype(np.float)
        sk = self.sharpen_kernel * float((torch.randn(1) - 0.5) / 8)
        sk[1, 1] += 1
        img = cv2.filter2D(img, -1, sk)
        point_factor = w * h // 2048
        for i in range(point_factor):
            center = [int(xx) for xx in (torch.rand(2) * torch.tensor([w, h]))]
            color = tuple([int(xx) for xx in torch.randint(0, 255, (4, ))])
            size = int(torch.randint(2, 5, (1, )))
            img = cv2.circle(img,
                             tuple(center),
                             size,
                             color=color,
                             thickness=-1)
        sk[1, 1] -= 1
        data['image'] = self.color_jitter(
            torch.Tensor(img.transpose(2, 0, 1)) / 255) * 255

        return data
Exemple #3
0
 def test_getitem(self):
     masks = BitMasks(torch.ones(3, 10, 10))
     self.assertEqual(masks[1].tensor.shape, (1, 10, 10))
     self.assertEqual(masks[1:3].tensor.shape, (2, 10, 10))
     self.assertEqual(
         masks[torch.tensor([True, False, False])].tensor.shape,
         (1, 10, 10))
Exemple #4
0
def process_mocap_predictions(
    mocap_predictions, bboxes, image_size=IMAGE_SIZE, masks=None
):
    """
    Rescales camera to follow HMR convention, and then computes the camera w.r.t. to
    image rather than local bounding box.

    Args:
        mocap_predictions (list).
        bboxes (N x 4): Bounding boxes in xyxy format.
        image_size (int): Max dimension of image.
        masks (N x H x W): Bit mask of people.

    Returns:
        dict {str: torch.cuda.FloatTensor}
            bbox: Bounding boxes in xyxy format (N x 3).
            cams: Weak perspective camera (N x 3).
            masks: Bitmasks used for computing ordinal depth loss, cropped to image
                space (N x L x L).
            local_cams: Weak perspective camera relative to the bounding boxes (N x 3).
    """
    verts = np.stack([p["pred_vertices_smpl"] for p in mocap_predictions])
    # All faces are the same, so just need one copy.
    faces = np.expand_dims(mocap_predictions[0]["faces"].astype(np.int32), 0)
    max_dim = np.max(bbox_xy_to_wh(bboxes)[:, 2:], axis=1)
    local_cams = []
    for b, pred in zip(max_dim, mocap_predictions):
        local_cam = pred["pred_camera"].copy()
        scale_o2n = pred["bbox_scale_ratio"] * b / 224
        local_cam[0] /= scale_o2n
        local_cam[1:] /= local_cam[:1]
        local_cams.append(local_cam)
    local_cams = np.stack(local_cams)
    global_cams = local_to_global_cam(bboxes, local_cams, image_size)
    inds = np.argsort(bboxes[:, 0])  # Sort from left to right to make debugging easy.
    person_parameters = {
        "bboxes": bboxes[inds].astype(np.float32),
        "cams": global_cams[inds].astype(np.float32),
        "faces": faces,
        "local_cams": local_cams[inds].astype(np.float32),
        "verts": verts[inds].astype(np.float32),
    }
    for k, v in person_parameters.items():
        person_parameters[k] = torch.from_numpy(v).cuda()
    if masks is not None:
        full_boxes = torch.tensor([[0, 0, image_size, image_size]] * len(bboxes))
        full_boxes = full_boxes.float().cuda()
        masks = BitMasks(masks).crop_and_resize(boxes=full_boxes, mask_size=image_size)
        person_parameters["masks"] = masks[inds].cuda()
    return person_parameters
Exemple #5
0
def wrapper(d, default_m, h, w):
    d = default_m(d)
    """
    d has keys: file_name, height, width, image, instances, etc.
    """
    img = d['image'].cpu().numpy().transpose(1, 2, 0)
    img = torch.tensor(img).type(torch.float)
    if 'instances' not in d:
        d['image'] = img
        return d
    #img = cv2.resize(img, (h, w))  # .transpose(2, 0, 1)
    h, w = d['instances'].image_size

    raw_h, raw_w = d['instances'].image_size
    masks = BitMasks.from_polygon_masks(d['instances'].gt_masks, raw_h,
                                        raw_w).tensor.type(torch.uint8)
    masks_resized = np.zeros((masks.shape[0], h, w))
    for i in range(masks.shape[0]):
        masks_resized[i] = masks[i].cpu().numpy()
        #masks_resized[i] = cv2.resize(masks[i].cpu().numpy(), (w, h))
    gt_masks = torch.tensor(masks_resized).type(torch.bool)
    """
    num_gt_masks = gt_masks.shape[0]
    ground = np.zeros_like(gt_masks[0], dtype=np.uint8)

    for j in range(num_gt_masks):
        ground[gt_masks[j]] = j + 1
    print(gt_masks.shape, np.unique(ground))
    """
    foreground = gt_masks[0]
    for i in range(1, gt_masks.shape[0]):
        foreground |= gt_masks[i]
    d['image'] = img
    d['labels'] = gt_masks
    d['background'] = ~foreground
    return d
Exemple #6
0
 def test_from_empty_polygons(self):
     masks = BitMasks.from_polygon_masks([], 100, 100)
     self.assertEqual(masks.tensor.shape, (0, 100, 100))