def get_predictions(self, detector, gt_detections=False): predictions = defaultdict(list) for data in tqdm(self.dataloader): images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255 if gt_detections: preds = data['gt_detections'] else: preds = detector.get_detections( images=images, one_instance_per_class=False, ) im_infos = data['im_infos'] for k in ('scene_id', 'view_id'): preds.infos[k] = preds.infos['batch_im_id'].apply( lambda idx: im_infos[idx][k]) predictions['detections'].append(preds) for k, v in predictions.items(): predictions[k] = tc.concatenate(predictions[k]) return predictions
def collate_fn(self, batch): obj_data = [] for data_n in batch: _, _, obs = data_n obj_data_ = parse_obs_data(obs) obj_data.append(obj_data_) obj_data = tc.concatenate(obj_data) return obj_data
def batched_model_predictions(self, model, images, K, obj_data, n_iterations=1): timer = Timer() timer.start() ids = torch.arange(len(obj_data)) ds = TensorDataset(ids) dl = DataLoader(ds, batch_size=self.bsz_objects) preds = defaultdict(list) for (batch_ids, ) in dl: timer.resume() obj_inputs = obj_data[batch_ids.numpy()] labels = obj_inputs.infos['label'].values im_ids = obj_inputs.infos['batch_im_id'].values images_ = images[im_ids] K_ = K[im_ids] TCO_input = obj_inputs.poses outputs = model(images=images_, K=K_, TCO=TCO_input, n_iterations=n_iterations, labels=labels) timer.pause() for n in range(1, n_iterations + 1): iter_outputs = outputs[f'iteration={n}'] infos = obj_inputs.infos batch_preds = tc.PandasTensorCollection( infos, poses=iter_outputs['TCO_output'], poses_input=iter_outputs['TCO_input'], K_crop=iter_outputs['K_crop'], boxes_rend=iter_outputs['boxes_rend'], boxes_crop=iter_outputs['boxes_crop']) preds[f'iteration={n}'].append(batch_preds) logger.debug( f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}' ) preds = dict(preds) for k, v in preds.items(): preds[k] = tc.concatenate(v) return preds
def reproject_scene(self, objects, cameras): TCO_data = [] for o in range(len(objects)): for v in range(len(cameras)): obj = objects[[o]] cam = cameras[[v]] infos = dict( scene_id=cam.infos['scene_id'].values, view_id=cam.infos['view_id'].values, score=obj.infos['score'].values + 1.0, view_group=obj.infos['view_group'].values, label=obj.infos['label'].values, batch_im_id=cam.infos['batch_im_id'].values, obj_id=obj.infos['obj_id'].values, from_ba=[True], ) data_ = tc.PandasTensorCollection( infos=pd.DataFrame(infos), poses=invert_T(cam.TWC) @ obj.TWO, ) TCO_data.append(data_) return tc.concatenate(TCO_data)
def get_predictions(self, pose_predictor, mv_predictor, detections=None, n_coarse_iterations=1, n_refiner_iterations=1, sv_score_th=0.0, skip_mv=True, use_detections_TCO=False): assert detections is not None if detections is not None: mask = (detections.infos['score'] >= sv_score_th) detections = detections[np.where(mask)[0]] detections.infos['det_id'] = np.arange(len(detections)) det_index = detections.infos.set_index(['scene_id', 'view_id']).sort_index() predictions = defaultdict(list) for data in tqdm(self.dataloader): images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255 cameras = data['cameras'].cuda().float() gt_detections = data['gt_detections'].cuda().float() scene_id = np.unique(gt_detections.infos['scene_id']) view_ids = np.unique(gt_detections.infos['view_id']) group_id = np.unique(gt_detections.infos['group_id']) n_gt_dets = len(gt_detections) logger.debug(f"{'-'*80}") logger.debug(f'Scene: {scene_id}') logger.debug(f'Views: {view_ids}') logger.debug(f'Group: {group_id}') logger.debug(f'Image has {n_gt_dets} gt detections. (not used)') if detections is not None: keep_ids, batch_im_ids = [], [] for group_name, group in cameras.infos.groupby( ['scene_id', 'view_id']): if group_name in det_index.index: other_group = det_index.loc[group_name] keep_ids_ = other_group['det_id'] batch_im_id = np.unique(group['batch_im_id']).item() batch_im_ids.append( np.ones(len(keep_ids_)) * batch_im_id) keep_ids.append(keep_ids_) if len(keep_ids) > 0: keep_ids = np.concatenate(keep_ids) batch_im_ids = np.concatenate(batch_im_ids) detections_ = detections[keep_ids] detections_.infos['batch_im_id'] = np.array( batch_im_ids).astype(np.int) else: raise ValueError('No detections') detections_ = detections_.cuda().float() detections_.infos['group_id'] = group_id.item() sv_preds, mv_preds = dict(), dict() if len(detections_) > 0: data_TCO_init = detections_ if use_detections_TCO else None detections__ = detections_ if not use_detections_TCO else None candidates, sv_preds = pose_predictor.get_predictions( images, cameras.K, detections=detections__, n_coarse_iterations=n_coarse_iterations, data_TCO_init=data_TCO_init, n_refiner_iterations=n_refiner_iterations, ) candidates.register_tensor('initial_bboxes', detections_.bboxes) if not skip_mv: mv_preds = mv_predictor.predict_scene_state( candidates, cameras, ) logger.debug(f"{'-'*80}") for k, v in sv_preds.items(): predictions[k].append(v.cpu()) for k, v in mv_preds.items(): predictions[k].append(v.cpu()) predictions = dict(predictions) for k, v in predictions.items(): predictions[k] = tc.concatenate(v) return predictions
def get_predictions(self, detector, pose_predictor, icp_refiner=None, mv_predictor=None, n_coarse_iterations=1, n_refiner_iterations=1, detection_th=0.0): predictions = defaultdict(list) use_icp = icp_refiner is not None for n, data in enumerate(tqdm(self.dataloader)): images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255 cameras = data['cameras'].cuda().float() im_infos = data['im_infos'] depth = None if self.load_depth: depth = data['depth'].cuda().float() logger.debug(f"{'-'*80}") logger.debug(f"Predictions on {data['im_infos']}") def get_preds(): torch.cuda.synchronize() start = time.time() this_batch_detections = detector.get_detections( images=images, one_instance_per_class=False, detection_th=detection_th, output_masks=use_icp, mask_th=0.9 ) for key in ('scene_id', 'view_id', 'group_id'): this_batch_detections.infos[key] = this_batch_detections.infos['batch_im_id'].apply(lambda idx: im_infos[idx][key]) all_preds = dict() if len(this_batch_detections) > 0: final_preds, all_preds = pose_predictor.get_predictions( images, cameras.K, detections=this_batch_detections, n_coarse_iterations=n_coarse_iterations, n_refiner_iterations=n_refiner_iterations, ) if len(images) > 1: mv_preds = mv_predictor.predict_scene_state( final_preds, cameras, ) all_preds['multiview'] = mv_preds['ba_output+all_cand'] final_preds = all_preds['multiview'] if use_icp: all_preds['icp'] = icp_refiner.refine_poses(final_preds, this_batch_detections.masks, depth, cameras) torch.cuda.synchronize() duration = time.time() - start n_dets = len(this_batch_detections) logger.debug(f'Full predictions: {n_dets} detections + pose estimation in {duration:.3f} s') logger.debug(f"{'-'*80}") return this_batch_detections, all_preds, duration # Run once without measuring timing if n == 0: get_preds() this_batch_detections, all_preds, duration = get_preds() duration = duration / len(images) # Divide by number of views in multi-view if use_icp: this_batch_detections.delete_tensor('masks') # Saves memory when saving # NOTE: time isn't correct for n iterations < max number of iterations for k, v in all_preds.items(): v.infos = v.infos.loc[:, ['scene_id', 'view_id', 'label', 'score']] v.infos['time'] = duration predictions[k].append(v.cpu()) predictions['detections'].append(this_batch_detections.cpu()) predictions = dict(predictions) for k, v in predictions.items(): predictions[k] = tc.concatenate(v) return predictions
def predict_scene_state(self, candidates, cameras, score_th=0.3, use_known_camera_poses=False, ransac_n_iter=2000, ransac_dist_threshold=0.02, ba_n_iter=100): predictions = dict() cand_inputs = candidates assert len(np.unique(candidates.infos['scene_id'])) == 1 scene_id = np.unique(candidates.infos['scene_id']).item() group_id = np.unique(candidates.infos['group_id']).item() keep = np.where(candidates.infos['score'] >= score_th)[0] candidates = candidates[keep] predictions['cand_inputs'] = candidates logger.debug(f'Num candidates: {len(candidates)}') logger.debug(f'Num views: {len(cameras)}') matching_outputs = multiview_candidate_matching( candidates=candidates, mesh_db=self.mesh_db_ransac, n_ransac_iter=ransac_n_iter, dist_threshold=ransac_dist_threshold, cameras=cameras if use_known_camera_poses else None) pairs_TC1C2 = matching_outputs['pairs_TC1C2'] candidates = matching_outputs['filtered_candidates'] logger.debug(f'Matched candidates: {len(candidates)}') for k, v in matching_outputs.items(): if 'time' in k: logger.debug(f'RANSAC {k}: {v}') predictions['cand_matched'] = candidates group_infos = make_view_groups(pairs_TC1C2) candidates = candidates.merge_df(group_infos, on='view_id').cuda() pred_objects, pred_cameras, pred_reproj = [], [], [] pred_reproj_init = [] for (view_group, candidate_ids ) in candidates.infos.groupby('view_group').groups.items(): candidates_n = candidates[candidate_ids] problem = MultiviewRefinement(candidates=candidates_n, cameras=cameras, pairs_TC1C2=pairs_TC1C2, mesh_db=self.mesh_db_ba) ba_outputs = problem.solve( n_iterations=ba_n_iter, optimize_cameras=not use_known_camera_poses, ) pred_objects_, pred_cameras_ = ba_outputs['objects'], ba_outputs[ 'cameras'] for x in (pred_objects_, pred_cameras_): x.infos['view_group'] = view_group x.infos['group_id'] = group_id x.infos['scene_id'] = scene_id pred_reproj.append( self.reproject_scene(pred_objects_, pred_cameras_)) pred_objects.append(pred_objects_) pred_cameras.append(pred_cameras_) pred_objects_init, pred_cameras_init = ba_outputs[ 'objects_init'], ba_outputs['cameras_init'] for x in (pred_objects_init, pred_cameras_init): x.infos['view_group'] = view_group x.infos['group_id'] = group_id x.infos['scene_id'] = scene_id pred_reproj_init.append( self.reproject_scene(pred_objects_init, pred_cameras_init)) for k, v in ba_outputs.items(): if 'time' in k: logger.debug(f'BA {k}: {v}') predictions['scene/objects'] = tc.concatenate(pred_objects) predictions['scene/cameras'] = tc.concatenate(pred_cameras) predictions['ba_output'] = tc.concatenate(pred_reproj) predictions['ba_input'] = tc.concatenate(pred_reproj_init) cand_inputs = tc.PandasTensorCollection( infos=cand_inputs.infos, poses=cand_inputs.poses, ) predictions['ba_output+all_cand'] = tc.concatenate( [predictions['ba_output'], cand_inputs], ) return predictions