data_dict['points'] = V[0].cpu().numpy()
        data_dict['normals'] = V_normal[0].cpu().numpy()
        data_dict['colors'] = np.ones_like(data_dict['points'],
                                           dtype=np.float32)
        data_dict['camera_mat'] = torch.empty(opt.num_cameras, 4, 4)

        # DVR data no projection step, assumes use SfMcamera
        cameras_dict = {}
        pcl_dict = {}
        pcl_dict['points'] = data_dict['points']
        pcl_dict['normals'] = data_dict['normals']
        pcl_dict['colors'] = data_dict['colors']

        idx = 0
        for c_idx, cams in tqdm(camera_sampler):
            meshes_batch = meshes.extend(batch_size)
            cams = cams.to(device)

            # create tri-color lights and a specular+diffuse shader
            if opt.tri_color_light:
                lights = get_tri_color_lights_for_view(
                    cams,
                    point_lights=opt.point_lights,
                    has_specular=opt.has_specular)
            else:
                lights = get_light_for_view(cams,
                                            point_lights=opt.point_lights,
                                            has_specular=opt.has_specular)

            assert (type(lights) is type(template_lights))
            lights.to(device=device)
def evaluate_for_pix3d(
    predictions,
    dataset,
    metadata,
    filter_iou,
    mesh_models=None,
    iou_thresh=0.5,
    mask_thresh=0.5,
    device=None,
    vis_preds=False,
):
    from PIL import Image

    if device is None:
        device = torch.device("cpu")

    F1_TARGET = "[email protected]"

    # classes
    cat_ids = sorted(dataset.getCatIds())
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    # initialize tensors to record box & mask AP, number of gt positives
    box_apscores, box_aplabels = {}, {}
    mask_apscores, mask_aplabels = {}, {}
    mesh_apscores, mesh_aplabels = {}, {}
    npos = {}
    for cat_id in cat_ids:
        box_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        box_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mask_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mask_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mesh_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mesh_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        npos[cat_id] = 0.0
    box_covered = []
    mask_covered = []
    mesh_covered = []

    # number of gt positive instances per class
    for gt_ann in dataset.dataset["annotations"]:
        gt_label = gt_ann["category_id"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        image_file_name = dataset.loadImgs([gt_ann["image_id"]
                                            ])[0]["file_name"]
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue
        npos[gt_label] += 1.0

    for prediction in predictions:

        original_id = prediction["image_id"]
        image_width = dataset.loadImgs([original_id])[0]["width"]
        image_height = dataset.loadImgs([original_id])[0]["height"]
        image_size = [image_height, image_width]
        image_file_name = dataset.loadImgs([original_id])[0]["file_name"]
        # examples with imgfiles = {img/table/1749.jpg, img/table/0045.png}
        # have a mismatch between images and masks. Thus, ignore
        if image_file_name in ["img/table/1749.jpg", "img/table/0045.png"]:
            continue

        if "instances" not in prediction:
            continue

        num_img_preds = len(prediction["instances"])
        if num_img_preds == 0:
            continue

        # predictions
        scores = prediction["instances"].scores
        boxes = prediction["instances"].pred_boxes.to(device)
        labels = prediction["instances"].pred_classes
        masks_rles = prediction["instances"].pred_masks_rle
        if hasattr(prediction["instances"], "pred_meshes"):
            meshes = prediction["instances"].pred_meshes  # preditected meshes
            verts = [mesh[0] for mesh in meshes]
            faces = [mesh[1] for mesh in meshes]
            meshes = Meshes(verts=verts, faces=faces).to(device)
        else:
            meshes = ico_sphere(4, device)
            meshes = meshes.extend(num_img_preds).to(device)
        if hasattr(prediction["instances"], "pred_dz"):
            pred_dz = prediction["instances"].pred_dz
            heights = boxes.tensor[:, 3] - boxes.tensor[:, 1]
            # NOTE see appendix for derivation of pred dz
            pred_dz = pred_dz[:, 0] * heights.cpu()
        else:
            raise ValueError("Z range of box not predicted")
        assert prediction["instances"].image_size[0] == image_height
        assert prediction["instances"].image_size[1] == image_width

        # ground truth
        # anotations corresponding to original_id (aka coco image_id)
        gt_ann_ids = dataset.getAnnIds(imgIds=[original_id])
        assert len(
            gt_ann_ids) == 1  # note that pix3d has one annotation per image
        gt_anns = dataset.loadAnns(gt_ann_ids)[0]
        assert gt_anns["image_id"] == original_id

        # get original ground truth mask, box, label & mesh
        maskfile = os.path.join(metadata.image_root, gt_anns["segmentation"])
        with PathManager.open(maskfile, "rb") as f:
            gt_mask = torch.tensor(
                np.asarray(Image.open(f), dtype=np.float32) / 255.0)
        assert gt_mask.shape[0] == image_height and gt_mask.shape[
            1] == image_width

        gt_mask = (gt_mask > 0).to(dtype=torch.uint8)  # binarize mask
        gt_mask_rle = [
            mask_util.encode(np.array(gt_mask[:, :, None], order="F"))[0]
        ]
        gt_box = np.array(gt_anns["bbox"]).reshape(-1, 4)  # xywh from coco
        gt_box = BoxMode.convert(gt_box, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        gt_label = gt_anns["category_id"]
        faux_gt_targets = Boxes(
            torch.tensor(gt_box, dtype=torch.float32, device=device))

        # load gt mesh and extrinsics/intrinsics
        gt_R = torch.tensor(gt_anns["rot_mat"]).to(device)
        gt_t = torch.tensor(gt_anns["trans_mat"]).to(device)
        gt_K = torch.tensor(gt_anns["K"]).to(device)
        if mesh_models is not None:
            modeltype = gt_anns["model"]
            gt_verts, gt_faces = (
                mesh_models[modeltype][0].clone(),
                mesh_models[modeltype][1].clone(),
            )
            gt_verts = gt_verts.to(device)
            gt_faces = gt_faces.to(device)
        else:
            # load from disc
            raise NotImplementedError
        gt_verts = shape_utils.transform_verts(gt_verts, gt_R, gt_t)
        gt_zrange = torch.stack([gt_verts[:, 2].min(), gt_verts[:, 2].max()])
        gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces])

        # box iou
        boxiou = pairwise_iou(boxes, faux_gt_targets)

        # filter predictions with iou > filter_iou
        valid_pred_ids = boxiou > filter_iou

        # mask iou
        miou = mask_util.iou(masks_rles, gt_mask_rle, [0])

        # # gt zrange (zrange stores min_z and max_z)
        # # zranges = torch.stack([gt_zrange] * len(meshes), dim=0)

        # predicted zrange (= pred_dz)
        assert hasattr(prediction["instances"], "pred_dz")
        # It's impossible to predict the center location in Z (=tc)
        # from the image. See appendix for more.
        tc = (gt_zrange[1] + gt_zrange[0]) / 2.0
        # Given a center location (tc) and a focal_length,
        # pred_dz = pred_dz * box_h * tc / focal_length
        # See appendix for more.
        zranges = torch.stack(
            [
                torch.stack([
                    tc - tc * pred_dz[i] / 2.0 / gt_K[0],
                    tc + tc * pred_dz[i] / 2.0 / gt_K[0]
                ]) for i in range(len(meshes))
            ],
            dim=0,
        )

        gt_Ks = gt_K.view(1, 3).expand(len(meshes), 3)
        meshes = transform_meshes_to_camera_coord_system(
            meshes, boxes.tensor, zranges, gt_Ks, image_size)

        if vis_preds:
            vis_utils.visualize_predictions(
                original_id,
                image_file_name,
                scores,
                labels,
                boxes.tensor,
                masks_rles,
                meshes,
                metadata,
                "/tmp/output",
            )

        shape_metrics = compare_meshes(meshes, gt_mesh, reduce=False)

        # sort predictions in descending order
        scores_sorted, idx_sorted = torch.sort(scores, descending=True)

        for pred_id in range(num_img_preds):
            # remember we only evaluate the preds that have overlap more than
            # iou_filter with the ground truth prediction
            if valid_pred_ids[idx_sorted[pred_id], 0] == 0:
                continue
            # map to dataset category id
            pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]].item()]
            pred_miou = miou[idx_sorted[pred_id]].item()
            pred_biou = boxiou[idx_sorted[pred_id]].item()
            pred_score = scores[idx_sorted[pred_id]].view(1).to(device)
            # note that metrics returns f1 in % (=x100)
            pred_f1 = shape_metrics[F1_TARGET][
                idx_sorted[pred_id]].item() / 100.0

            # mask
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_miou > iou_thresh)
                    and (original_id not in mask_covered)):
                tpfp[0] = 1
                mask_covered.append(original_id)
            mask_apscores[pred_label].append(pred_score)
            mask_aplabels[pred_label].append(tpfp)

            # box
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_biou > iou_thresh)
                    and (original_id not in box_covered)):
                tpfp[0] = 1
                box_covered.append(original_id)
            box_apscores[pred_label].append(pred_score)
            box_aplabels[pred_label].append(tpfp)

            # mesh
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_f1 > iou_thresh)
                    and (original_id not in mesh_covered)):
                tpfp[0] = 1
                mesh_covered.append(original_id)
            mesh_apscores[pred_label].append(pred_score)
            mesh_aplabels[pred_label].append(tpfp)

    # check things for eval
    # assert npos.sum() == len(dataset.dataset["annotations"])
    # convert to tensors
    pix3d_metrics = {}
    boxap, maskap, meshap = 0.0, 0.0, 0.0
    valid = 0.0
    for cat_id in cat_ids:
        cat_name = dataset.loadCats([cat_id])[0]["name"]
        if npos[cat_id] == 0:
            continue
        valid += 1

        cat_box_ap = VOCap.compute_ap(torch.cat(box_apscores[cat_id]),
                                      torch.cat(box_aplabels[cat_id]),
                                      npos[cat_id])
        boxap += cat_box_ap
        pix3d_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap

        cat_mask_ap = VOCap.compute_ap(torch.cat(mask_apscores[cat_id]),
                                       torch.cat(mask_aplabels[cat_id]),
                                       npos[cat_id])
        maskap += cat_mask_ap
        pix3d_metrics["mask_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mask_ap

        cat_mesh_ap = VOCap.compute_ap(torch.cat(mesh_apscores[cat_id]),
                                       torch.cat(mesh_aplabels[cat_id]),
                                       npos[cat_id])
        meshap += cat_mesh_ap
        pix3d_metrics["mesh_ap@%.1f - %s" %
                      (iou_thresh, cat_name)] = cat_mesh_ap

    pix3d_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid
    pix3d_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid
    pix3d_metrics["mesh_ap@%.1f" % iou_thresh] = meshap / valid

    # print test ground truth
    vis_utils.print_instances_class_histogram(
        [npos[cat_id] for cat_id in cat_ids],  # number of instances
        [dataset.loadCats([cat_id])[0]["name"]
         for cat_id in cat_ids],  # class names
        pix3d_metrics,
    )

    return pix3d_metrics
Exemple #3
0
    def test_extend(self):
        B = 5
        mesh = TestMeshes.init_mesh(B, 30, 50)
        V = mesh._V
        num_faces = mesh.num_faces_per_mesh()
        num_verts = mesh.num_verts_per_mesh()
        faces_uvs_list = [torch.randint(size=(f, 3), low=0, high=V) for f in num_faces]
        verts_uvs_list = [torch.rand(v, 2) for v in num_verts]
        tex_uv = TexturesUV(
            maps=torch.ones((B, 16, 16, 3)),
            faces_uvs=faces_uvs_list,
            verts_uvs=verts_uvs_list,
        )
        tex_mesh = Meshes(
            verts=mesh.verts_list(), faces=mesh.faces_list(), textures=tex_uv
        )
        N = 2
        new_mesh = tex_mesh.extend(N)

        self.assertEqual(len(tex_mesh) * N, len(new_mesh))

        tex_init = tex_mesh.textures
        new_tex = new_mesh.textures

        new_tex_num_verts = new_mesh.num_verts_per_mesh()
        for i in range(len(tex_mesh)):
            for n in range(N):
                tex_nv = new_tex_num_verts[i * N + n]
                self.assertClose(
                    # The original textures were initialized using
                    # verts uvs list
                    tex_init.verts_uvs_list()[i],
                    # In the new textures, the verts_uvs are initialized
                    # from padded. The verts per mesh are not used to
                    # convert from padded to list. See TexturesUV for an
                    # explanation.
                    new_tex.verts_uvs_list()[i * N + n][:tex_nv, ...],
                )
                self.assertClose(
                    tex_init.faces_uvs_list()[i], new_tex.faces_uvs_list()[i * N + n]
                )
                self.assertClose(
                    tex_init.maps_padded()[i, ...], new_tex.maps_padded()[i * N + n]
                )
                self.assertClose(
                    tex_init._num_faces_per_mesh[i],
                    new_tex._num_faces_per_mesh[i * N + n],
                )

        self.assertAllSeparate(
            [
                tex_init.faces_uvs_padded(),
                new_tex.faces_uvs_padded(),
                tex_init.verts_uvs_padded(),
                new_tex.verts_uvs_padded(),
                tex_init.maps_padded(),
                new_tex.maps_padded(),
            ]
        )

        with self.assertRaises(ValueError):
            tex_mesh.extend(N=-1)
Exemple #4
0
    def test_extend(self):
        B = 10
        mesh = TestMeshes.init_mesh(B, 30, 50)
        V = mesh._V
        F = mesh._F

        # 1. Texture uvs
        tex_uv = Textures(
            maps=torch.randn((B, 16, 16, 3)),
            faces_uvs=torch.randint(size=(B, F, 3), low=0, high=V),
            verts_uvs=torch.randn((B, V, 2)),
        )
        tex_mesh = Meshes(verts=mesh.verts_padded(),
                          faces=mesh.faces_padded(),
                          textures=tex_uv)
        N = 20
        new_mesh = tex_mesh.extend(N)

        self.assertEqual(len(tex_mesh) * N, len(new_mesh))

        tex_init = tex_mesh.textures
        new_tex = new_mesh.textures

        for i in range(len(tex_mesh)):
            for n in range(N):
                self.assertClose(tex_init.faces_uvs_list()[i],
                                 new_tex.faces_uvs_list()[i * N + n])
                self.assertClose(tex_init.verts_uvs_list()[i],
                                 new_tex.verts_uvs_list()[i * N + n])
        self.assertAllSeparate([
            tex_init.faces_uvs_padded(),
            new_tex.faces_uvs_padded(),
            tex_init.verts_uvs_padded(),
            new_tex.verts_uvs_padded(),
            tex_init.maps_padded(),
            new_tex.maps_padded(),
        ])

        self.assertIsNone(new_tex.verts_rgb_list())
        self.assertIsNone(new_tex.verts_rgb_padded())
        self.assertIsNone(new_tex.verts_rgb_packed())

        # 2. Texture vertex RGB
        tex_rgb = Textures(verts_rgb=torch.randn((B, V, 3)))
        tex_mesh_rgb = Meshes(verts=mesh.verts_padded(),
                              faces=mesh.faces_padded(),
                              textures=tex_rgb)
        N = 20
        new_mesh_rgb = tex_mesh_rgb.extend(N)

        self.assertEqual(len(tex_mesh_rgb) * N, len(new_mesh_rgb))

        tex_init = tex_mesh_rgb.textures
        new_tex = new_mesh_rgb.textures

        for i in range(len(tex_mesh_rgb)):
            for n in range(N):
                self.assertClose(tex_init.verts_rgb_list()[i],
                                 new_tex.verts_rgb_list()[i * N + n])
        self.assertAllSeparate(
            [tex_init.verts_rgb_padded(),
             new_tex.verts_rgb_padded()])

        self.assertIsNone(new_tex.verts_uvs_padded())
        self.assertIsNone(new_tex.verts_uvs_list())
        self.assertIsNone(new_tex.verts_uvs_packed())
        self.assertIsNone(new_tex.faces_uvs_padded())
        self.assertIsNone(new_tex.faces_uvs_list())
        self.assertIsNone(new_tex.faces_uvs_packed())

        # 3. Error
        with self.assertRaises(ValueError):
            tex_mesh.extend(N=-1)
Exemple #5
0
    def _forward_shape(self, features, instances):
        """
        Forward logic for the voxel and mesh refinement branch.

        Args:
            features (list[Tensor]): #level input features for voxel prediction
            instances (list[Instances]): the per-image instances to train/predict meshes.
                In training, they can be the proposals.
                In inference, they can be the predicted boxes.
        Returns:
            In training, a dict of losses.
            In inference, update `instances` with new fields "pred_voxels" & "pred_meshes" and return it.
        """
        if not self.voxel_on and not self.mesh_on:
            return {} if self.training else instances

        features = [features[f] for f in self.in_features]
        if self.training:
            # The loss is only defined on positive proposals.
            proposals, _ = select_foreground_proposals(instances,
                                                       self.num_classes)
            proposal_boxes = [x.proposal_boxes for x in proposals]

            losses = {}
            if self.voxel_on:
                voxel_features = self.voxel_pooler(features, proposal_boxes)
                voxel_logits = self.voxel_head(voxel_features)
                loss_voxel, target_voxels = voxel_rcnn_loss(
                    voxel_logits,
                    proposals,
                    loss_weight=self.voxel_loss_weight)
                losses.update({"loss_voxel": loss_voxel})
                if self._vis:
                    self._misc["target_voxels"] = target_voxels
                if self.cls_agnostic_voxel:
                    with torch.no_grad():
                        vox_in = voxel_logits.sigmoid().squeeze(
                            1)  # (N, V, V, V)
                        init_mesh = cubify(vox_in, self.cubify_thresh)  # 1
                else:
                    raise ValueError(
                        "No support for class specific predictions")

            if self.mesh_on:
                mesh_features = self.mesh_pooler(features, proposal_boxes)
                if not self.voxel_on:
                    if mesh_features.shape[0] > 0:
                        init_mesh = ico_sphere(self.ico_sphere_level,
                                               mesh_features.device)
                        init_mesh = init_mesh.extend(mesh_features.shape[0])
                    else:
                        init_mesh = Meshes(verts=[], faces=[])
                pred_meshes = self.mesh_head(mesh_features, init_mesh)

                # loss weights
                loss_weights = {
                    "chamfer": self.chamfer_loss_weight,
                    "normals": self.normals_loss_weight,
                    "edge": self.edge_loss_weight,
                }

                if not pred_meshes[0].isempty():
                    loss_chamfer, loss_normals, loss_edge, target_meshes = mesh_rcnn_loss(
                        pred_meshes,
                        proposals,
                        loss_weights=loss_weights,
                        gt_num_samples=self.gt_num_samples,
                        pred_num_samples=self.pred_num_samples,
                        gt_coord_thresh=self.gt_coord_thresh,
                    )
                    if self._vis:
                        self._misc["init_meshes"] = init_mesh
                        self._misc["target_meshes"] = target_meshes
                else:
                    loss_chamfer = sum(
                        k.sum() for k in self.mesh_head.parameters()) * 0.0
                    loss_normals = sum(
                        k.sum() for k in self.mesh_head.parameters()) * 0.0
                    loss_edge = sum(k.sum()
                                    for k in self.mesh_head.parameters()) * 0.0

                losses.update({
                    "loss_chamfer": loss_chamfer,
                    "loss_normals": loss_normals,
                    "loss_edge": loss_edge,
                })

            return losses
        else:
            pred_boxes = [x.pred_boxes for x in instances]

            if self.voxel_on:
                voxel_features = self.voxel_pooler(features, pred_boxes)
                voxel_logits = self.voxel_head(voxel_features)
                voxel_rcnn_inference(voxel_logits, instances)
                if self.cls_agnostic_voxel:
                    with torch.no_grad():
                        vox_in = voxel_logits.sigmoid().squeeze(
                            1)  # (N, V, V, V)
                        init_mesh = cubify(vox_in, self.cubify_thresh)  # 1
                else:
                    raise ValueError(
                        "No support for class specific predictions")

            if self.mesh_on:
                mesh_features = self.mesh_pooler(features, pred_boxes)
                if not self.voxel_on:
                    if mesh_features.shape[0] > 0:
                        init_mesh = ico_sphere(self.ico_sphere_level,
                                               mesh_features.device)
                        init_mesh = init_mesh.extend(mesh_features.shape[0])
                    else:
                        init_mesh = Meshes(verts=[], faces=[])
                pred_meshes = self.mesh_head(mesh_features, init_mesh)
                mesh_rcnn_inference(pred_meshes[-1], instances)
            else:
                assert self.voxel_on
                mesh_rcnn_inference(init_mesh, instances)

            return instances