data_dict['points'] = V[0].cpu().numpy() data_dict['normals'] = V_normal[0].cpu().numpy() data_dict['colors'] = np.ones_like(data_dict['points'], dtype=np.float32) data_dict['camera_mat'] = torch.empty(opt.num_cameras, 4, 4) # DVR data no projection step, assumes use SfMcamera cameras_dict = {} pcl_dict = {} pcl_dict['points'] = data_dict['points'] pcl_dict['normals'] = data_dict['normals'] pcl_dict['colors'] = data_dict['colors'] idx = 0 for c_idx, cams in tqdm(camera_sampler): meshes_batch = meshes.extend(batch_size) cams = cams.to(device) # create tri-color lights and a specular+diffuse shader if opt.tri_color_light: lights = get_tri_color_lights_for_view( cams, point_lights=opt.point_lights, has_specular=opt.has_specular) else: lights = get_light_for_view(cams, point_lights=opt.point_lights, has_specular=opt.has_specular) assert (type(lights) is type(template_lights)) lights.to(device=device)
def evaluate_for_pix3d( predictions, dataset, metadata, filter_iou, mesh_models=None, iou_thresh=0.5, mask_thresh=0.5, device=None, vis_preds=False, ): from PIL import Image if device is None: device = torch.device("cpu") F1_TARGET = "[email protected]" # classes cat_ids = sorted(dataset.getCatIds()) reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } # initialize tensors to record box & mask AP, number of gt positives box_apscores, box_aplabels = {}, {} mask_apscores, mask_aplabels = {}, {} mesh_apscores, mesh_aplabels = {}, {} npos = {} for cat_id in cat_ids: box_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] box_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] mask_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] mask_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] mesh_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] mesh_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] npos[cat_id] = 0.0 box_covered = [] mask_covered = [] mesh_covered = [] # number of gt positive instances per class for gt_ann in dataset.dataset["annotations"]: gt_label = gt_ann["category_id"] # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png} # have a mismatch between images and masks. Thus, ignore image_file_name = dataset.loadImgs([gt_ann["image_id"] ])[0]["file_name"] if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]: continue npos[gt_label] += 1.0 for prediction in predictions: original_id = prediction["image_id"] image_width = dataset.loadImgs([original_id])[0]["width"] image_height = dataset.loadImgs([original_id])[0]["height"] image_size = [image_height, image_width] image_file_name = dataset.loadImgs([original_id])[0]["file_name"] # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png} # have a mismatch between images and masks. Thus, ignore if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]: continue if "instances" not in prediction: continue num_img_preds = len(prediction["instances"]) if num_img_preds == 0: continue # predictions scores = prediction["instances"].scores boxes = prediction["instances"].pred_boxes.to(device) labels = prediction["instances"].pred_classes masks_rles = prediction["instances"].pred_masks_rle if hasattr(prediction["instances"], "pred_meshes"): meshes = prediction["instances"].pred_meshes # preditected meshes verts = [mesh[0] for mesh in meshes] faces = [mesh[1] for mesh in meshes] meshes = Meshes(verts=verts, faces=faces).to(device) else: meshes = ico_sphere(4, device) meshes = meshes.extend(num_img_preds).to(device) if hasattr(prediction["instances"], "pred_dz"): pred_dz = prediction["instances"].pred_dz heights = boxes.tensor[:, 3] - boxes.tensor[:, 1] # NOTE see appendix for derivation of pred dz pred_dz = pred_dz[:, 0] * heights.cpu() else: raise ValueError("Z range of box not predicted") assert prediction["instances"].image_size[0] == image_height assert prediction["instances"].image_size[1] == image_width # ground truth # anotations corresponding to original_id (aka coco image_id) gt_ann_ids = dataset.getAnnIds(imgIds=[original_id]) assert len( gt_ann_ids) == 1 # note that pix3d has one annotation per image gt_anns = dataset.loadAnns(gt_ann_ids)[0] assert gt_anns["image_id"] == original_id # get original ground truth mask, box, label & mesh maskfile = os.path.join(metadata.image_root, gt_anns["segmentation"]) with PathManager.open(maskfile, "rb") as f: gt_mask = torch.tensor( np.asarray(Image.open(f), dtype=np.float32) / 255.0) assert gt_mask.shape[0] == image_height and gt_mask.shape[ 1] == image_width gt_mask = (gt_mask > 0).to(dtype=torch.uint8) # binarize mask gt_mask_rle = [ mask_util.encode(np.array(gt_mask[:, :, None], order="F"))[0] ] gt_box = np.array(gt_anns["bbox"]).reshape(-1, 4) # xywh from coco gt_box = BoxMode.convert(gt_box, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_label = gt_anns["category_id"] faux_gt_targets = Boxes( torch.tensor(gt_box, dtype=torch.float32, device=device)) # load gt mesh and extrinsics/intrinsics gt_R = torch.tensor(gt_anns["rot_mat"]).to(device) gt_t = torch.tensor(gt_anns["trans_mat"]).to(device) gt_K = torch.tensor(gt_anns["K"]).to(device) if mesh_models is not None: modeltype = gt_anns["model"] gt_verts, gt_faces = ( mesh_models[modeltype][0].clone(), mesh_models[modeltype][1].clone(), ) gt_verts = gt_verts.to(device) gt_faces = gt_faces.to(device) else: # load from disc raise NotImplementedError gt_verts = shape_utils.transform_verts(gt_verts, gt_R, gt_t) gt_zrange = torch.stack([gt_verts[:, 2].min(), gt_verts[:, 2].max()]) gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces]) # box iou boxiou = pairwise_iou(boxes, faux_gt_targets) # filter predictions with iou > filter_iou valid_pred_ids = boxiou > filter_iou # mask iou miou = mask_util.iou(masks_rles, gt_mask_rle, [0]) # # gt zrange (zrange stores min_z and max_z) # # zranges = torch.stack([gt_zrange] * len(meshes), dim=0) # predicted zrange (= pred_dz) assert hasattr(prediction["instances"], "pred_dz") # It's impossible to predict the center location in Z (=tc) # from the image. See appendix for more. tc = (gt_zrange[1] + gt_zrange[0]) / 2.0 # Given a center location (tc) and a focal_length, # pred_dz = pred_dz * box_h * tc / focal_length # See appendix for more. zranges = torch.stack( [ torch.stack([ tc - tc * pred_dz[i] / 2.0 / gt_K[0], tc + tc * pred_dz[i] / 2.0 / gt_K[0] ]) for i in range(len(meshes)) ], dim=0, ) gt_Ks = gt_K.view(1, 3).expand(len(meshes), 3) meshes = transform_meshes_to_camera_coord_system( meshes, boxes.tensor, zranges, gt_Ks, image_size) if vis_preds: vis_utils.visualize_predictions( original_id, image_file_name, scores, labels, boxes.tensor, masks_rles, meshes, metadata, "/tmp/output", ) shape_metrics = compare_meshes(meshes, gt_mesh, reduce=False) # sort predictions in descending order scores_sorted, idx_sorted = torch.sort(scores, descending=True) for pred_id in range(num_img_preds): # remember we only evaluate the preds that have overlap more than # iou_filter with the ground truth prediction if valid_pred_ids[idx_sorted[pred_id], 0] == 0: continue # map to dataset category id pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]].item()] pred_miou = miou[idx_sorted[pred_id]].item() pred_biou = boxiou[idx_sorted[pred_id]].item() pred_score = scores[idx_sorted[pred_id]].view(1).to(device) # note that metrics returns f1 in % (=x100) pred_f1 = shape_metrics[F1_TARGET][ idx_sorted[pred_id]].item() / 100.0 # mask tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_miou > iou_thresh) and (original_id not in mask_covered)): tpfp[0] = 1 mask_covered.append(original_id) mask_apscores[pred_label].append(pred_score) mask_aplabels[pred_label].append(tpfp) # box tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_biou > iou_thresh) and (original_id not in box_covered)): tpfp[0] = 1 box_covered.append(original_id) box_apscores[pred_label].append(pred_score) box_aplabels[pred_label].append(tpfp) # mesh tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_f1 > iou_thresh) and (original_id not in mesh_covered)): tpfp[0] = 1 mesh_covered.append(original_id) mesh_apscores[pred_label].append(pred_score) mesh_aplabels[pred_label].append(tpfp) # check things for eval # assert npos.sum() == len(dataset.dataset["annotations"]) # convert to tensors pix3d_metrics = {} boxap, maskap, meshap = 0.0, 0.0, 0.0 valid = 0.0 for cat_id in cat_ids: cat_name = dataset.loadCats([cat_id])[0]["name"] if npos[cat_id] == 0: continue valid += 1 cat_box_ap = VOCap.compute_ap(torch.cat(box_apscores[cat_id]), torch.cat(box_aplabels[cat_id]), npos[cat_id]) boxap += cat_box_ap pix3d_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap cat_mask_ap = VOCap.compute_ap(torch.cat(mask_apscores[cat_id]), torch.cat(mask_aplabels[cat_id]), npos[cat_id]) maskap += cat_mask_ap pix3d_metrics["mask_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_mask_ap cat_mesh_ap = VOCap.compute_ap(torch.cat(mesh_apscores[cat_id]), torch.cat(mesh_aplabels[cat_id]), npos[cat_id]) meshap += cat_mesh_ap pix3d_metrics["mesh_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_mesh_ap pix3d_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid pix3d_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid pix3d_metrics["mesh_ap@%.1f" % iou_thresh] = meshap / valid # print test ground truth vis_utils.print_instances_class_histogram( [npos[cat_id] for cat_id in cat_ids], # number of instances [dataset.loadCats([cat_id])[0]["name"] for cat_id in cat_ids], # class names pix3d_metrics, ) return pix3d_metrics
def test_extend(self): B = 5 mesh = TestMeshes.init_mesh(B, 30, 50) V = mesh._V num_faces = mesh.num_faces_per_mesh() num_verts = mesh.num_verts_per_mesh() faces_uvs_list = [torch.randint(size=(f, 3), low=0, high=V) for f in num_faces] verts_uvs_list = [torch.rand(v, 2) for v in num_verts] tex_uv = TexturesUV( maps=torch.ones((B, 16, 16, 3)), faces_uvs=faces_uvs_list, verts_uvs=verts_uvs_list, ) tex_mesh = Meshes( verts=mesh.verts_list(), faces=mesh.faces_list(), textures=tex_uv ) N = 2 new_mesh = tex_mesh.extend(N) self.assertEqual(len(tex_mesh) * N, len(new_mesh)) tex_init = tex_mesh.textures new_tex = new_mesh.textures new_tex_num_verts = new_mesh.num_verts_per_mesh() for i in range(len(tex_mesh)): for n in range(N): tex_nv = new_tex_num_verts[i * N + n] self.assertClose( # The original textures were initialized using # verts uvs list tex_init.verts_uvs_list()[i], # In the new textures, the verts_uvs are initialized # from padded. The verts per mesh are not used to # convert from padded to list. See TexturesUV for an # explanation. new_tex.verts_uvs_list()[i * N + n][:tex_nv, ...], ) self.assertClose( tex_init.faces_uvs_list()[i], new_tex.faces_uvs_list()[i * N + n] ) self.assertClose( tex_init.maps_padded()[i, ...], new_tex.maps_padded()[i * N + n] ) self.assertClose( tex_init._num_faces_per_mesh[i], new_tex._num_faces_per_mesh[i * N + n], ) self.assertAllSeparate( [ tex_init.faces_uvs_padded(), new_tex.faces_uvs_padded(), tex_init.verts_uvs_padded(), new_tex.verts_uvs_padded(), tex_init.maps_padded(), new_tex.maps_padded(), ] ) with self.assertRaises(ValueError): tex_mesh.extend(N=-1)
def test_extend(self): B = 10 mesh = TestMeshes.init_mesh(B, 30, 50) V = mesh._V F = mesh._F # 1. Texture uvs tex_uv = Textures( maps=torch.randn((B, 16, 16, 3)), faces_uvs=torch.randint(size=(B, F, 3), low=0, high=V), verts_uvs=torch.randn((B, V, 2)), ) tex_mesh = Meshes(verts=mesh.verts_padded(), faces=mesh.faces_padded(), textures=tex_uv) N = 20 new_mesh = tex_mesh.extend(N) self.assertEqual(len(tex_mesh) * N, len(new_mesh)) tex_init = tex_mesh.textures new_tex = new_mesh.textures for i in range(len(tex_mesh)): for n in range(N): self.assertClose(tex_init.faces_uvs_list()[i], new_tex.faces_uvs_list()[i * N + n]) self.assertClose(tex_init.verts_uvs_list()[i], new_tex.verts_uvs_list()[i * N + n]) self.assertAllSeparate([ tex_init.faces_uvs_padded(), new_tex.faces_uvs_padded(), tex_init.verts_uvs_padded(), new_tex.verts_uvs_padded(), tex_init.maps_padded(), new_tex.maps_padded(), ]) self.assertIsNone(new_tex.verts_rgb_list()) self.assertIsNone(new_tex.verts_rgb_padded()) self.assertIsNone(new_tex.verts_rgb_packed()) # 2. Texture vertex RGB tex_rgb = Textures(verts_rgb=torch.randn((B, V, 3))) tex_mesh_rgb = Meshes(verts=mesh.verts_padded(), faces=mesh.faces_padded(), textures=tex_rgb) N = 20 new_mesh_rgb = tex_mesh_rgb.extend(N) self.assertEqual(len(tex_mesh_rgb) * N, len(new_mesh_rgb)) tex_init = tex_mesh_rgb.textures new_tex = new_mesh_rgb.textures for i in range(len(tex_mesh_rgb)): for n in range(N): self.assertClose(tex_init.verts_rgb_list()[i], new_tex.verts_rgb_list()[i * N + n]) self.assertAllSeparate( [tex_init.verts_rgb_padded(), new_tex.verts_rgb_padded()]) self.assertIsNone(new_tex.verts_uvs_padded()) self.assertIsNone(new_tex.verts_uvs_list()) self.assertIsNone(new_tex.verts_uvs_packed()) self.assertIsNone(new_tex.faces_uvs_padded()) self.assertIsNone(new_tex.faces_uvs_list()) self.assertIsNone(new_tex.faces_uvs_packed()) # 3. Error with self.assertRaises(ValueError): tex_mesh.extend(N=-1)
def _forward_shape(self, features, instances): """ Forward logic for the voxel and mesh refinement branch. Args: features (list[Tensor]): #level input features for voxel prediction instances (list[Instances]): the per-image instances to train/predict meshes. In training, they can be the proposals. In inference, they can be the predicted boxes. Returns: In training, a dict of losses. In inference, update `instances` with new fields "pred_voxels" & "pred_meshes" and return it. """ if not self.voxel_on and not self.mesh_on: return {} if self.training else instances features = [features[f] for f in self.in_features] if self.training: # The loss is only defined on positive proposals. proposals, _ = select_foreground_proposals(instances, self.num_classes) proposal_boxes = [x.proposal_boxes for x in proposals] losses = {} if self.voxel_on: voxel_features = self.voxel_pooler(features, proposal_boxes) voxel_logits = self.voxel_head(voxel_features) loss_voxel, target_voxels = voxel_rcnn_loss( voxel_logits, proposals, loss_weight=self.voxel_loss_weight) losses.update({"loss_voxel": loss_voxel}) if self._vis: self._misc["target_voxels"] = target_voxels if self.cls_agnostic_voxel: with torch.no_grad(): vox_in = voxel_logits.sigmoid().squeeze( 1) # (N, V, V, V) init_mesh = cubify(vox_in, self.cubify_thresh) # 1 else: raise ValueError( "No support for class specific predictions") if self.mesh_on: mesh_features = self.mesh_pooler(features, proposal_boxes) if not self.voxel_on: if mesh_features.shape[0] > 0: init_mesh = ico_sphere(self.ico_sphere_level, mesh_features.device) init_mesh = init_mesh.extend(mesh_features.shape[0]) else: init_mesh = Meshes(verts=[], faces=[]) pred_meshes = self.mesh_head(mesh_features, init_mesh) # loss weights loss_weights = { "chamfer": self.chamfer_loss_weight, "normals": self.normals_loss_weight, "edge": self.edge_loss_weight, } if not pred_meshes[0].isempty(): loss_chamfer, loss_normals, loss_edge, target_meshes = mesh_rcnn_loss( pred_meshes, proposals, loss_weights=loss_weights, gt_num_samples=self.gt_num_samples, pred_num_samples=self.pred_num_samples, gt_coord_thresh=self.gt_coord_thresh, ) if self._vis: self._misc["init_meshes"] = init_mesh self._misc["target_meshes"] = target_meshes else: loss_chamfer = sum( k.sum() for k in self.mesh_head.parameters()) * 0.0 loss_normals = sum( k.sum() for k in self.mesh_head.parameters()) * 0.0 loss_edge = sum(k.sum() for k in self.mesh_head.parameters()) * 0.0 losses.update({ "loss_chamfer": loss_chamfer, "loss_normals": loss_normals, "loss_edge": loss_edge, }) return losses else: pred_boxes = [x.pred_boxes for x in instances] if self.voxel_on: voxel_features = self.voxel_pooler(features, pred_boxes) voxel_logits = self.voxel_head(voxel_features) voxel_rcnn_inference(voxel_logits, instances) if self.cls_agnostic_voxel: with torch.no_grad(): vox_in = voxel_logits.sigmoid().squeeze( 1) # (N, V, V, V) init_mesh = cubify(vox_in, self.cubify_thresh) # 1 else: raise ValueError( "No support for class specific predictions") if self.mesh_on: mesh_features = self.mesh_pooler(features, pred_boxes) if not self.voxel_on: if mesh_features.shape[0] > 0: init_mesh = ico_sphere(self.ico_sphere_level, mesh_features.device) init_mesh = init_mesh.extend(mesh_features.shape[0]) else: init_mesh = Meshes(verts=[], faces=[]) pred_meshes = self.mesh_head(mesh_features, init_mesh) mesh_rcnn_inference(pred_meshes[-1], instances) else: assert self.voxel_on mesh_rcnn_inference(init_mesh, instances) return instances