def test_get_bounding_box(self): masks = torch.tensor([ [ [False, False, False, True], [False, False, True, True], [False, True, True, False], [False, True, True, False], ], [ [False, False, False, False], [False, False, True, False], [False, True, True, False], [False, True, True, False], ], torch.zeros(4, 4), ]) bitmask = BitMasks(masks) box_true = torch.tensor([[1, 0, 4, 4], [1, 1, 3, 4], [0, 0, 0, 0]], dtype=torch.float32) box = bitmask.get_bounding_boxes() self.assertTrue(torch.all(box.tensor == box_true).item()) for box in box_true: poly = box[[0, 1, 2, 1, 2, 3, 0, 3]].numpy() mask = polygons_to_bitmask([poly], 4, 4) reconstruct_box = BitMasks( mask[None, :, :]).get_bounding_boxes()[0].tensor self.assertTrue(torch.all(box == reconstruct_box).item()) reconstruct_box = PolygonMasks([[poly] ]).get_bounding_boxes()[0].tensor self.assertTrue(torch.all(box == reconstruct_box).item())
def __call__(self, dataset_dict): # read data data = super(TrainMapper, self).__call__(dataset_dict) instance = data['instances'] # decode gt_mask from polygon to bitmask h, w = instance.image_size instance.gt_masks = BitMasks.from_polygon_masks( instance.gt_masks, h, w) # augmentations img = data['image'].numpy().transpose(1, 2, 0).astype(np.float) sk = self.sharpen_kernel * float((torch.randn(1) - 0.5) / 8) sk[1, 1] += 1 img = cv2.filter2D(img, -1, sk) point_factor = w * h // 2048 for i in range(point_factor): center = [int(xx) for xx in (torch.rand(2) * torch.tensor([w, h]))] color = tuple([int(xx) for xx in torch.randint(0, 255, (4, ))]) size = int(torch.randint(2, 5, (1, ))) img = cv2.circle(img, tuple(center), size, color=color, thickness=-1) sk[1, 1] -= 1 data['image'] = self.color_jitter( torch.Tensor(img.transpose(2, 0, 1)) / 255) * 255 return data
def test_getitem(self): masks = BitMasks(torch.ones(3, 10, 10)) self.assertEqual(masks[1].tensor.shape, (1, 10, 10)) self.assertEqual(masks[1:3].tensor.shape, (2, 10, 10)) self.assertEqual( masks[torch.tensor([True, False, False])].tensor.shape, (1, 10, 10))
def process_mocap_predictions( mocap_predictions, bboxes, image_size=IMAGE_SIZE, masks=None ): """ Rescales camera to follow HMR convention, and then computes the camera w.r.t. to image rather than local bounding box. Args: mocap_predictions (list). bboxes (N x 4): Bounding boxes in xyxy format. image_size (int): Max dimension of image. masks (N x H x W): Bit mask of people. Returns: dict {str: torch.cuda.FloatTensor} bbox: Bounding boxes in xyxy format (N x 3). cams: Weak perspective camera (N x 3). masks: Bitmasks used for computing ordinal depth loss, cropped to image space (N x L x L). local_cams: Weak perspective camera relative to the bounding boxes (N x 3). """ verts = np.stack([p["pred_vertices_smpl"] for p in mocap_predictions]) # All faces are the same, so just need one copy. faces = np.expand_dims(mocap_predictions[0]["faces"].astype(np.int32), 0) max_dim = np.max(bbox_xy_to_wh(bboxes)[:, 2:], axis=1) local_cams = [] for b, pred in zip(max_dim, mocap_predictions): local_cam = pred["pred_camera"].copy() scale_o2n = pred["bbox_scale_ratio"] * b / 224 local_cam[0] /= scale_o2n local_cam[1:] /= local_cam[:1] local_cams.append(local_cam) local_cams = np.stack(local_cams) global_cams = local_to_global_cam(bboxes, local_cams, image_size) inds = np.argsort(bboxes[:, 0]) # Sort from left to right to make debugging easy. person_parameters = { "bboxes": bboxes[inds].astype(np.float32), "cams": global_cams[inds].astype(np.float32), "faces": faces, "local_cams": local_cams[inds].astype(np.float32), "verts": verts[inds].astype(np.float32), } for k, v in person_parameters.items(): person_parameters[k] = torch.from_numpy(v).cuda() if masks is not None: full_boxes = torch.tensor([[0, 0, image_size, image_size]] * len(bboxes)) full_boxes = full_boxes.float().cuda() masks = BitMasks(masks).crop_and_resize(boxes=full_boxes, mask_size=image_size) person_parameters["masks"] = masks[inds].cuda() return person_parameters
def wrapper(d, default_m, h, w): d = default_m(d) """ d has keys: file_name, height, width, image, instances, etc. """ img = d['image'].cpu().numpy().transpose(1, 2, 0) img = torch.tensor(img).type(torch.float) if 'instances' not in d: d['image'] = img return d #img = cv2.resize(img, (h, w)) # .transpose(2, 0, 1) h, w = d['instances'].image_size raw_h, raw_w = d['instances'].image_size masks = BitMasks.from_polygon_masks(d['instances'].gt_masks, raw_h, raw_w).tensor.type(torch.uint8) masks_resized = np.zeros((masks.shape[0], h, w)) for i in range(masks.shape[0]): masks_resized[i] = masks[i].cpu().numpy() #masks_resized[i] = cv2.resize(masks[i].cpu().numpy(), (w, h)) gt_masks = torch.tensor(masks_resized).type(torch.bool) """ num_gt_masks = gt_masks.shape[0] ground = np.zeros_like(gt_masks[0], dtype=np.uint8) for j in range(num_gt_masks): ground[gt_masks[j]] = j + 1 print(gt_masks.shape, np.unique(ground)) """ foreground = gt_masks[0] for i in range(1, gt_masks.shape[0]): foreground |= gt_masks[i] d['image'] = img d['labels'] = gt_masks d['background'] = ~foreground return d
def test_from_empty_polygons(self): masks = BitMasks.from_polygon_masks([], 100, 100) self.assertEqual(masks.tensor.shape, (0, 100, 100))