Ejemplo n.º 1
0
    def get_predictions(self, detector, gt_detections=False):

        predictions = defaultdict(list)

        for data in tqdm(self.dataloader):
            images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255

            if gt_detections:
                preds = data['gt_detections']
            else:
                preds = detector.get_detections(
                    images=images,
                    one_instance_per_class=False,
                )

            im_infos = data['im_infos']
            for k in ('scene_id', 'view_id'):
                preds.infos[k] = preds.infos['batch_im_id'].apply(
                    lambda idx: im_infos[idx][k])

            predictions['detections'].append(preds)

        for k, v in predictions.items():
            predictions[k] = tc.concatenate(predictions[k])
        return predictions
Ejemplo n.º 2
0
 def collate_fn(self, batch):
     obj_data = []
     for data_n in batch:
         _, _, obs = data_n
         obj_data_ = parse_obs_data(obs)
         obj_data.append(obj_data_)
     obj_data = tc.concatenate(obj_data)
     return obj_data
Ejemplo n.º 3
0
    def batched_model_predictions(self,
                                  model,
                                  images,
                                  K,
                                  obj_data,
                                  n_iterations=1):
        timer = Timer()
        timer.start()

        ids = torch.arange(len(obj_data))

        ds = TensorDataset(ids)
        dl = DataLoader(ds, batch_size=self.bsz_objects)

        preds = defaultdict(list)
        for (batch_ids, ) in dl:
            timer.resume()
            obj_inputs = obj_data[batch_ids.numpy()]
            labels = obj_inputs.infos['label'].values
            im_ids = obj_inputs.infos['batch_im_id'].values
            images_ = images[im_ids]
            K_ = K[im_ids]
            TCO_input = obj_inputs.poses
            outputs = model(images=images_,
                            K=K_,
                            TCO=TCO_input,
                            n_iterations=n_iterations,
                            labels=labels)
            timer.pause()
            for n in range(1, n_iterations + 1):
                iter_outputs = outputs[f'iteration={n}']

                infos = obj_inputs.infos
                batch_preds = tc.PandasTensorCollection(
                    infos,
                    poses=iter_outputs['TCO_output'],
                    poses_input=iter_outputs['TCO_input'],
                    K_crop=iter_outputs['K_crop'],
                    boxes_rend=iter_outputs['boxes_rend'],
                    boxes_crop=iter_outputs['boxes_crop'])
                preds[f'iteration={n}'].append(batch_preds)

        logger.debug(
            f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}'
        )
        preds = dict(preds)
        for k, v in preds.items():
            preds[k] = tc.concatenate(v)
        return preds
Ejemplo n.º 4
0
 def reproject_scene(self, objects, cameras):
     TCO_data = []
     for o in range(len(objects)):
         for v in range(len(cameras)):
             obj = objects[[o]]
             cam = cameras[[v]]
             infos = dict(
                 scene_id=cam.infos['scene_id'].values,
                 view_id=cam.infos['view_id'].values,
                 score=obj.infos['score'].values + 1.0,
                 view_group=obj.infos['view_group'].values,
                 label=obj.infos['label'].values,
                 batch_im_id=cam.infos['batch_im_id'].values,
                 obj_id=obj.infos['obj_id'].values,
                 from_ba=[True],
             )
             data_ = tc.PandasTensorCollection(
                 infos=pd.DataFrame(infos),
                 poses=invert_T(cam.TWC) @ obj.TWO,
             )
             TCO_data.append(data_)
     return tc.concatenate(TCO_data)
Ejemplo n.º 5
0
    def get_predictions(self,
                        pose_predictor,
                        mv_predictor,
                        detections=None,
                        n_coarse_iterations=1,
                        n_refiner_iterations=1,
                        sv_score_th=0.0,
                        skip_mv=True,
                        use_detections_TCO=False):

        assert detections is not None
        if detections is not None:
            mask = (detections.infos['score'] >= sv_score_th)
            detections = detections[np.where(mask)[0]]
            detections.infos['det_id'] = np.arange(len(detections))
            det_index = detections.infos.set_index(['scene_id',
                                                    'view_id']).sort_index()

        predictions = defaultdict(list)
        for data in tqdm(self.dataloader):
            images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255
            cameras = data['cameras'].cuda().float()
            gt_detections = data['gt_detections'].cuda().float()

            scene_id = np.unique(gt_detections.infos['scene_id'])
            view_ids = np.unique(gt_detections.infos['view_id'])
            group_id = np.unique(gt_detections.infos['group_id'])
            n_gt_dets = len(gt_detections)
            logger.debug(f"{'-'*80}")
            logger.debug(f'Scene: {scene_id}')
            logger.debug(f'Views: {view_ids}')
            logger.debug(f'Group: {group_id}')
            logger.debug(f'Image has {n_gt_dets} gt detections. (not used)')

            if detections is not None:
                keep_ids, batch_im_ids = [], []
                for group_name, group in cameras.infos.groupby(
                    ['scene_id', 'view_id']):
                    if group_name in det_index.index:
                        other_group = det_index.loc[group_name]
                        keep_ids_ = other_group['det_id']
                        batch_im_id = np.unique(group['batch_im_id']).item()
                        batch_im_ids.append(
                            np.ones(len(keep_ids_)) * batch_im_id)
                        keep_ids.append(keep_ids_)
                if len(keep_ids) > 0:
                    keep_ids = np.concatenate(keep_ids)
                    batch_im_ids = np.concatenate(batch_im_ids)
                detections_ = detections[keep_ids]
                detections_.infos['batch_im_id'] = np.array(
                    batch_im_ids).astype(np.int)
            else:
                raise ValueError('No detections')
            detections_ = detections_.cuda().float()
            detections_.infos['group_id'] = group_id.item()

            sv_preds, mv_preds = dict(), dict()
            if len(detections_) > 0:
                data_TCO_init = detections_ if use_detections_TCO else None
                detections__ = detections_ if not use_detections_TCO else None
                candidates, sv_preds = pose_predictor.get_predictions(
                    images,
                    cameras.K,
                    detections=detections__,
                    n_coarse_iterations=n_coarse_iterations,
                    data_TCO_init=data_TCO_init,
                    n_refiner_iterations=n_refiner_iterations,
                )
                candidates.register_tensor('initial_bboxes',
                                           detections_.bboxes)

                if not skip_mv:
                    mv_preds = mv_predictor.predict_scene_state(
                        candidates,
                        cameras,
                    )
            logger.debug(f"{'-'*80}")

            for k, v in sv_preds.items():
                predictions[k].append(v.cpu())

            for k, v in mv_preds.items():
                predictions[k].append(v.cpu())

        predictions = dict(predictions)
        for k, v in predictions.items():
            predictions[k] = tc.concatenate(v)
        return predictions
Ejemplo n.º 6
0
    def get_predictions(self,
                        detector,
                        pose_predictor,
                        icp_refiner=None,
                        mv_predictor=None,
                        n_coarse_iterations=1,
                        n_refiner_iterations=1,
                        detection_th=0.0):

        predictions = defaultdict(list)
        use_icp = icp_refiner is not None
        for n, data in enumerate(tqdm(self.dataloader)):
            images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255
            cameras = data['cameras'].cuda().float()
            im_infos = data['im_infos']
            depth = None
            if self.load_depth:
                depth = data['depth'].cuda().float()
            logger.debug(f"{'-'*80}")
            logger.debug(f"Predictions on {data['im_infos']}")

            def get_preds():
                torch.cuda.synchronize()
                start = time.time()
                this_batch_detections = detector.get_detections(
                    images=images, one_instance_per_class=False, detection_th=detection_th,
                    output_masks=use_icp, mask_th=0.9
                )
                for key in ('scene_id', 'view_id', 'group_id'):
                    this_batch_detections.infos[key] = this_batch_detections.infos['batch_im_id'].apply(lambda idx: im_infos[idx][key])

                all_preds = dict()
                if len(this_batch_detections) > 0:
                    final_preds, all_preds = pose_predictor.get_predictions(
                        images, cameras.K, detections=this_batch_detections,
                        n_coarse_iterations=n_coarse_iterations,
                        n_refiner_iterations=n_refiner_iterations,
                    )

                    if len(images) > 1:
                        mv_preds = mv_predictor.predict_scene_state(
                            final_preds, cameras,
                        )
                        all_preds['multiview'] = mv_preds['ba_output+all_cand']
                        final_preds = all_preds['multiview']

                    if use_icp:
                        all_preds['icp'] = icp_refiner.refine_poses(final_preds, this_batch_detections.masks, depth, cameras)

                torch.cuda.synchronize()
                duration = time.time() - start
                n_dets = len(this_batch_detections)

                logger.debug(f'Full predictions: {n_dets} detections + pose estimation in {duration:.3f} s')
                logger.debug(f"{'-'*80}")
                return this_batch_detections, all_preds, duration

            # Run once without measuring timing
            if n == 0:
                get_preds()
            this_batch_detections, all_preds, duration = get_preds()
            duration = duration / len(images)  # Divide by number of views in multi-view

            if use_icp:
                this_batch_detections.delete_tensor('masks')  # Saves memory when saving

            # NOTE: time isn't correct for n iterations < max number of iterations
            for k, v in all_preds.items():
                v.infos = v.infos.loc[:, ['scene_id', 'view_id', 'label', 'score']]
                v.infos['time'] = duration
                predictions[k].append(v.cpu())
            predictions['detections'].append(this_batch_detections.cpu())

        predictions = dict(predictions)
        for k, v in predictions.items():
            predictions[k] = tc.concatenate(v)
        return predictions
Ejemplo n.º 7
0
    def predict_scene_state(self,
                            candidates,
                            cameras,
                            score_th=0.3,
                            use_known_camera_poses=False,
                            ransac_n_iter=2000,
                            ransac_dist_threshold=0.02,
                            ba_n_iter=100):

        predictions = dict()
        cand_inputs = candidates

        assert len(np.unique(candidates.infos['scene_id'])) == 1
        scene_id = np.unique(candidates.infos['scene_id']).item()
        group_id = np.unique(candidates.infos['group_id']).item()
        keep = np.where(candidates.infos['score'] >= score_th)[0]
        candidates = candidates[keep]

        predictions['cand_inputs'] = candidates

        logger.debug(f'Num candidates: {len(candidates)}')
        logger.debug(f'Num views: {len(cameras)}')

        matching_outputs = multiview_candidate_matching(
            candidates=candidates,
            mesh_db=self.mesh_db_ransac,
            n_ransac_iter=ransac_n_iter,
            dist_threshold=ransac_dist_threshold,
            cameras=cameras if use_known_camera_poses else None)

        pairs_TC1C2 = matching_outputs['pairs_TC1C2']
        candidates = matching_outputs['filtered_candidates']

        logger.debug(f'Matched candidates: {len(candidates)}')
        for k, v in matching_outputs.items():
            if 'time' in k:
                logger.debug(f'RANSAC {k}: {v}')

        predictions['cand_matched'] = candidates

        group_infos = make_view_groups(pairs_TC1C2)
        candidates = candidates.merge_df(group_infos, on='view_id').cuda()

        pred_objects, pred_cameras, pred_reproj = [], [], []
        pred_reproj_init = []
        for (view_group, candidate_ids
             ) in candidates.infos.groupby('view_group').groups.items():
            candidates_n = candidates[candidate_ids]
            problem = MultiviewRefinement(candidates=candidates_n,
                                          cameras=cameras,
                                          pairs_TC1C2=pairs_TC1C2,
                                          mesh_db=self.mesh_db_ba)
            ba_outputs = problem.solve(
                n_iterations=ba_n_iter,
                optimize_cameras=not use_known_camera_poses,
            )
            pred_objects_, pred_cameras_ = ba_outputs['objects'], ba_outputs[
                'cameras']
            for x in (pred_objects_, pred_cameras_):
                x.infos['view_group'] = view_group
                x.infos['group_id'] = group_id
                x.infos['scene_id'] = scene_id
            pred_reproj.append(
                self.reproject_scene(pred_objects_, pred_cameras_))
            pred_objects.append(pred_objects_)
            pred_cameras.append(pred_cameras_)

            pred_objects_init, pred_cameras_init = ba_outputs[
                'objects_init'], ba_outputs['cameras_init']
            for x in (pred_objects_init, pred_cameras_init):
                x.infos['view_group'] = view_group
                x.infos['group_id'] = group_id
                x.infos['scene_id'] = scene_id
            pred_reproj_init.append(
                self.reproject_scene(pred_objects_init, pred_cameras_init))

            for k, v in ba_outputs.items():
                if 'time' in k:
                    logger.debug(f'BA {k}: {v}')

        predictions['scene/objects'] = tc.concatenate(pred_objects)
        predictions['scene/cameras'] = tc.concatenate(pred_cameras)

        predictions['ba_output'] = tc.concatenate(pred_reproj)
        predictions['ba_input'] = tc.concatenate(pred_reproj_init)

        cand_inputs = tc.PandasTensorCollection(
            infos=cand_inputs.infos,
            poses=cand_inputs.poses,
        )
        predictions['ba_output+all_cand'] = tc.concatenate(
            [predictions['ba_output'], cand_inputs], )
        return predictions