def do_feature_extraction(cfg, model, dataset_name): with inference_context(model): dump_folder = os.path.join(cfg.OUTPUT_DIR, "features", dataset_to_folder_mapper[dataset_name]) PathManager.mkdirs(dump_folder) # data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name) extract_grid_feature_on_local(model, dump_folder, 'data/train_images')
def do_feature_extraction(cfg, model, image_dir, image_list): with inference_context(model): # dump_folder = os.path.join(cfg.OUTPUT_DIR, "features") dump_folder = 'gridfeats' PathManager.mkdirs(dump_folder) # data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name) extract_grid_feature_on_dataset(model, image_dir, image_list, dump_folder)
def do_feature_extraction(cfg, model, args): dataset_name = args.dataset with inference_context(model): # edit config file cfg.defrost() cfg.OUTPUT_DIR = args.output_dir cfg.freeze() dump_folder = os.path.join(cfg.OUTPUT_DIR, "features", dataset_to_folder_mapper[dataset_name]) PathManager.mkdirs(dump_folder) data_loader = build_detection_test_loader_with_attributes( cfg, dataset_name) extract_grid_feature_on_dataset(model, data_loader, dump_folder, args)
def get_meta_attention(cls, cfg, model): device = next(model.parameters()).device base_ids = torch.tensor(cfg.DATASETS.FEWSHOT.BASE_CLASSES_ID).long() novel_ids = torch.tensor(cfg.DATASETS.FEWSHOT.NOVEL_CLASSES_ID).long() base_ids = base_ids.to(device) novel_ids = novel_ids.to(device) base_data_loader = cls.build_base_meta_loader(cfg) _base_meta_data_loader_iter = iter(base_data_loader) base_data = next(_base_meta_data_loader_iter) with inference_context(model), torch.no_grad(): meta_attention = model(None, meta_data=base_data, return_attention=True) return meta_attention
def inference(cfg, model, dataset_name, dataset_path): with inference_context(model): if dataset_name not in dataset_to_folder_mapper: dataset_to_folder_mapper[dataset_name] = dataset_name set_metadata(dataset_name) data_loader = build_detection_test_loader_for_images( cfg, dataset_path) else: data_loader = build_detection_test_loader_with_attributes( cfg, dataset_name) dump_folder = os.path.join(cfg.OUTPUT_DIR, "my_inference", dataset_to_folder_mapper[dataset_name]) PathManager.mkdirs(dump_folder) inference_on_dataset(cfg, model, data_loader, dataset_name, dump_folder)
def training_loop(cfg, cp, model, optimizer, scheduler, loaders, device, loss_fn): #if comm.is_main_process(): # wandb.init(project='MeshRCNN', config=cfg, name='prediction_module') Timer.timing = False iteration_timer = Timer("Iteration") # model.parameters() is surprisingly expensive at 150ms, so cache it if hasattr(model, "module"): params = list(model.module.parameters()) else: params = list(model.parameters()) loss_moving_average = cp.data.get("loss_moving_average", None) # Zhengyuan modification loss_predictor = LossPredictionModule().to(device) loss_pred_optim = torch.optim.Adam(loss_predictor.parameters(), lr=1e-5) while cp.epoch < cfg.SOLVER.NUM_EPOCHS: if comm.is_main_process(): logger.info("Starting epoch %d / %d" % (cp.epoch + 1, cfg.SOLVER.NUM_EPOCHS)) # When using a DistributedSampler we need to manually set the epoch so that # the data is shuffled differently at each epoch for loader in loaders.values(): if hasattr(loader.sampler, "set_epoch"): loader.sampler.set_epoch(cp.epoch) # Config settings for renderer render_image_size = 256 blend_params = BlendParams(sigma=1e-4, gamma=1e-4) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma, faces_per_pixel=50, ) rot_y_90 = torch.tensor([[0, 0, 1, 0], [0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 0, 1]]).float().to(device) for i, batch in enumerate(loaders["train"]): if i == 0: iteration_timer.start() else: iteration_timer.tick() batch = loaders["train"].postprocess(batch, device) if dataset == 'MeshVoxMulti': imgs, meshes_gt, points_gt, normals_gt, voxels_gt, id_strs, _, render_RTs, RTs = batch else: imgs, meshes_gt, points_gt, normals_gt, voxels_gt = batch with inference_context(model): # NOTE: _imgs contains all of the other images in belonging to this model # We have to select the next-best-view from that list of images model_kwargs = {} if cfg.MODEL.VOXEL_ON and cp.t < cfg.MODEL.VOXEL_HEAD.VOXEL_ONLY_ITERS: model_kwargs["voxel_only"] = True with Timer("Forward"): voxel_scores, meshes_pred = model(imgs, **model_kwargs) total_silh_loss = torch.tensor( 0.) # Total silhouette loss, to be added to "loss" below # Voxel only training for first few iterations if not meshes_gt is None and not model_kwargs.get( "voxel_only", False): _meshes_pred = meshes_pred[-1].clone() _meshes_gt = meshes_gt[-1].clone() # Render masks from predicted mesh for each view # GT probability map to supervise prediction module B = len(meshes_gt) probability_map = 0.01 * torch.ones( (B, 24)).to(device) # batch size x 24 viewgrid = torch.zeros( (B, 24, render_image_size, render_image_size)).to(device) # batch size x 24 x H x W for b, (cur_gt_mesh, cur_pred_mesh) in enumerate( zip(meshes_gt, _meshes_pred)): # Maybe computationally expensive, but need to transform back to world space based on rendered image viewpoint RT = RTs[b] # Rotate 90 degrees about y-axis and invert invRT = torch.inverse(RT.mm(rot_y_90)) invRT_no_rot = torch.inverse(RT) # Just invert cur_pred_mesh._verts_list[0] = project_verts( cur_pred_mesh._verts_list[0], invRT) sid = id_strs[b].split('-')[0] # For some strange reason all classes (expect vehicle class) require a 90 degree rotation about the y-axis if sid == '02958343': cur_gt_mesh._verts_list[0] = project_verts( cur_gt_mesh._verts_list[0], invRT_no_rot) else: cur_gt_mesh._verts_list[0] = project_verts( cur_gt_mesh._verts_list[0], invRT) for iid in range(len(render_RTs[b])): R = render_RTs[b][iid][:3, :3].unsqueeze(0) T = render_RTs[b][iid][:3, 3].unsqueeze(0) cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T) silhouette_renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftSilhouetteShader( blend_params=blend_params)) ref_image = (silhouette_renderer( meshes_world=cur_gt_mesh, R=R, T=T) > 0).float() image = (silhouette_renderer( meshes_world=cur_pred_mesh, R=R, T=T) > 0).float() #Add image silhouette to viewgrid viewgrid[b, iid] = image[..., -1] ''' import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.imshow(ref_image[0,:,:,3].detach().cpu().numpy()) plt.subplot(1,2,2) plt.imshow(image[0,:,:,3].detach().cpu().numpy()) plt.show() ''' # MSE Loss between both silhouettes silh_loss = torch.sum( (image[0, :, :, 3] - ref_image[0, :, :, 3])**2) probability_map[b, iid] = silh_loss.detach() total_silh_loss += silh_loss probability_map = probability_map / (torch.max( probability_map, dim=1)[0].unsqueeze(1)) # Normalize probability_map = torch.nn.functional.softmax( probability_map, dim=1).to(device) # Softmax across images #nbv_idx = torch.argmax(probability_map, dim=1) # Next-best view indices #nbv_imgs = _imgs[torch.arange(B), nbv_idx] # Next-best view images # NOTE: Do a second forward pass through the model? This time for multi-view reconstruction # The input should be the first image and the next-best view #voxel_scores, meshes_pred = model(nbv_imgs, **model_kwargs) # Zhengyuan step loss_prediction predictor_loss = loss_predictor.train_batch( viewgrid, probability_map, loss_pred_optim) if comm.is_main_process(): #wandb.log({'prediction module loss':predictor_loss}) if cp.t % 50 == 0: print('{} predictor_loss: {}'.format( cp.t, predictor_loss)) #Save checkpoint every t iteration if cp.t % 500 == 0: print( 'Saving loss prediction module at iter {}'.format( cp.t)) os.makedirs('./output_prediction_module', exist_ok=True) torch.save( loss_predictor.state_dict(), './output_prediction_module/prediction_module_' + str(cp.t) + '.pth') cp.step() if cp.t % cfg.SOLVER.CHECKPOINT_PERIOD == 0: eval_and_save(model, loaders, optimizer, scheduler, cp) cp.step_epoch() eval_and_save(model, loaders, optimizer, scheduler, cp) if comm.is_main_process(): logger.info("Evaluating on test set:") test_loader = build_data_loader(cfg, dataset, "test", multigpu=False) evaluate_test(model, test_loader)
def inference_on_dataset(model, data_loader, evaluator): """#NOTE: modified to add time Run model on the data_loader and evaluate the metrics with evaluator. Also benchmark the inference speed of `model.forward` accurately. The model will be used in eval mode. Args: model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want to benchmark, but don't want to do any evaluation. Returns: The return value of `evaluator.evaluate()` """ num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) total = len(data_loader) # inference data loader must have a fixed length if evaluator is None: # create a no-op evaluator evaluator = DatasetEvaluators([]) evaluator.reset() num_warmup = min(5, total - 1) start_time = time.perf_counter() total_compute_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 start_compute_time = time.perf_counter() outputs = model(inputs) if torch.cuda.is_available(): torch.cuda.synchronize() cur_compute_time = time.perf_counter() - start_compute_time total_compute_time += cur_compute_time for _o in outputs: _o['time'] = cur_compute_time / len(outputs) evaluator.process(inputs, outputs) iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) seconds_per_img = total_compute_time / iters_after_start if idx >= num_warmup * 2 or seconds_per_img > 5: total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) log_every_n_seconds( logging.INFO, "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta)), n=5, ) # Measure the time only for this worker (before the synchronization barrier) total_time = time.perf_counter() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)". format(total_time_str, total_time / (total - num_warmup), num_devices)) total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) results = evaluator.evaluate() # An evaluator may return None when not in main process. # Replace it by an empty dict instead to make it easier for downstream code to handle if results is None: results = {} return results
def evaluate_test(model, data_loader, vis_preds=False): """ This function evaluates the model on the dataset defined by data_loader. The metrics reported are described in Table 2 of our paper. """ # Note that all eval runs on main process assert comm.is_main_process() deprocess = imagenet_deprocess(rescale_image=False) device = torch.device("cuda:0") # evaluation class_names = { "02828884": "bench", "03001627": "chair", "03636649": "lamp", "03691459": "speaker", "04090263": "firearm", "04379243": "table", "04530566": "watercraft", "02691156": "plane", "02933112": "cabinet", "02958343": "car", "03211117": "monitor", "04256520": "couch", "04401088": "cellphone", } num_instances = {i: 0 for i in class_names} chamfer = {i: 0 for i in class_names} normal = {i: 0 for i in class_names} f1_01 = {i: 0 for i in class_names} f1_03 = {i: 0 for i in class_names} f1_05 = {i: 0 for i in class_names} num_batch_evaluated = 0 for batch in data_loader: batch = data_loader.postprocess(batch, device) imgs, meshes_gt, _, _, _, id_strs, _imgs = batch #NOTE: _imgs contains all of the other images in belonging to this model #We have to select the next-best-view from that list of images sids = [id_str.split("-")[0] for id_str in id_strs] for sid in sids: num_instances[sid] += 1 with inference_context(model): voxel_scores, meshes_pred = model(imgs) #TODO: Render masks from predicted mesh for each view cur_metrics = compare_meshes(meshes_pred[-1], meshes_gt, reduce=False) cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh( ).cpu() cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh( ).cpu() for i, sid in enumerate(sids): chamfer[sid] += cur_metrics["Chamfer-L2"][i].item() normal[sid] += cur_metrics["AbsNormalConsistency"][i].item() f1_01[sid] += cur_metrics["F1@%f" % 0.1][i].item() f1_03[sid] += cur_metrics["F1@%f" % 0.3][i].item() f1_05[sid] += cur_metrics["F1@%f" % 0.5][i].item() if vis_preds: img = image_to_numpy(deprocess(imgs[i])) vis_utils.visualize_prediction(id_strs[i], img, meshes_pred[-1][i], "/tmp/output") num_batch_evaluated += 1 logger.info("Evaluated %d / %d batches" % (num_batch_evaluated, len(data_loader))) vis_utils.print_instances_class_histogram( num_instances, class_names, { "chamfer": chamfer, "normal": normal, "f1_01": f1_01, "f1_03": f1_03, "f1_05": f1_05 }, )
def evaluate_test_p2m(model, data_loader): """ This function evaluates the model on the dataset defined by data_loader. The metrics reported are described in Table 1 of our paper, following previous reported approaches (like Pixel2Mesh - p2m), where meshes are rescaled by a factor of 0.57. See the paper for more details. """ assert comm.is_main_process() device = torch.device("cuda:0") # evaluation class_names = { "02828884": "bench", "03001627": "chair", "03636649": "lamp", "03691459": "speaker", "04090263": "firearm", "04379243": "table", "04530566": "watercraft", "02691156": "plane", "02933112": "cabinet", "02958343": "car", "03211117": "monitor", "04256520": "couch", "04401088": "cellphone", } num_instances = {i: 0 for i in class_names} chamfer = {i: 0 for i in class_names} normal = {i: 0 for i in class_names} f1_1e_4 = {i: 0 for i in class_names} f1_2e_4 = {i: 0 for i in class_names} num_batch_evaluated = 0 for batch in data_loader: batch = data_loader.postprocess(batch, device) imgs, meshes_gt, _, _, _, id_strs = batch sids = [id_str.split("-")[0] for id_str in id_strs] for sid in sids: num_instances[sid] += 1 with inference_context(model): voxel_scores, meshes_pred = model(imgs) # NOTE that for the F1 thresholds we take the square root of 1e-4 & 2e-4 # as `compare_meshes` returns the euclidean distance (L2) of two pointclouds. # In Pixel2Mesh, the squared L2 (L2^2) is computed instead. # i.e. (L2^2 < τ) <=> (L2 < sqrt(τ)) cur_metrics = compare_meshes(meshes_pred[-1], meshes_gt, scale=0.57, thresholds=[0.01, 0.014142], reduce=False) cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh( ).cpu() cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh( ).cpu() for i, sid in enumerate(sids): chamfer[sid] += cur_metrics["Chamfer-L2"][i].item() normal[sid] += cur_metrics["AbsNormalConsistency"][i].item() f1_1e_4[sid] += cur_metrics["F1@%f" % 0.01][i].item() f1_2e_4[sid] += cur_metrics["F1@%f" % 0.014142][i].item() num_batch_evaluated += 1 logger.info("Evaluated %d / %d batches" % (num_batch_evaluated, len(data_loader))) vis_utils.print_instances_class_histogram_p2m( num_instances, class_names, { "chamfer": chamfer, "normal": normal, "f1_1e_4": f1_1e_4, "f1_2e_4": f1_2e_4 }, )
def inference_custom(model, data_loader, evaluator): num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) total = len(data_loader) # inference data loader must have a fixed length if evaluator is None: # create a no-op evaluator evaluator = DatasetEvaluators([]) evaluator.reset() num_warmup = min(5, total - 1) start_time = time.perf_counter() total_compute_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 start_compute_time = time.perf_counter() outputs = model(inputs) if torch.cuda.is_available(): torch.cuda.synchronize() total_compute_time += time.perf_counter() - start_compute_time evaluator.process(inputs, outputs) iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) seconds_per_img = total_compute_time / iters_after_start if idx >= num_warmup * 2 or seconds_per_img > 5: total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) log_every_n_seconds( logging.INFO, "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta)), n=5, ) # Measure the time only for this worker (before the synchronization barrier) total_time = time.perf_counter() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)". format(total_time_str, total_time / (total - num_warmup), num_devices)) total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) results = evaluator.evaluate() # An evaluator may return None when not in main process. # Replace it by an empty dict instead to make it easier for downstream code to handle if results is None: results = {} return results
def inference_on_dataset( model, data_loader, evaluator, num_classes, topk, num_estimate, min_score ): """ Run model on the data_loader and evaluate the metrics with evaluator. Also benchmark the inference speed of `model.forward` accurately. The model will be used in eval mode. Args: model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want to benchmark, but don't want to do any evaluation. topk (int) num_estimate (int): Number of images to estimate initial score threshold. Returns: The return value of `evaluator.evaluate()` """ num_devices = get_world_size() logger.info("Start inference on {} images".format(len(data_loader))) if isinstance(topk, int): logger.info(f"Collecting top-{topk} images.") topk = [topk] * num_classes else: logger.info(f"Collecting top-k images. Counts:\n{topk}") total = len(data_loader) # inference data loader must have a fixed length if evaluator is None: # create a no-op evaluator evaluator = DatasetEvaluators([]) evaluator.reset() num_warmup = min(5, total - 1) start_time = time.perf_counter() total_compute_time = 0 # We keep track of scores from _this_ process (process_scores) and scores from # all processes (scores). Every iter, each process updates process_scores and its # local scores with the new scores from the model. # Every few iterations, all processes pass their process_scores to each other and # updates their own global scores. # Map category id to min-heap of top scores from this process. process_scores = defaultdict(list) # Map category id to min-heap of top scores from all processes. global_scores = defaultdict(list) init_thresholds = torch.full( (num_classes + 1,), fill_value=min_score, dtype=torch.float32 ).to(model.device) init_threshold_path = Path(evaluator._output_dir) / "_thresholds_checkpoint.pth" if init_threshold_path.exists(): logger.info("Loading thresholds from disk.") init_thresholds = torch.load(init_threshold_path).to(model.device) else: init_threshold_path.parent.mkdir(exist_ok=True, parents=True) # Trying to get exactly the top-k estimates can result in getting slightly fewer # than K estimates. This can happen due to subtle differences in the model's forward # pass in the first phase vs. the second phase. For example, in the first phase, # when we have low thresholds, D2 will use torchvision.ops.boxes.batched_nms for # batch NMS. In phase 2, D2 will use a slightly different, customized # implementation, which may occasionally result in fewer boxes. # To address this, we set thresholds to be a bit looser, targeting 10% more # predictions than requested. topk_loose = [int(ceil(k * 1.1)) for k in topk] def get_thresholds(scores, min_thresholds): thresholds = [] for i in range(num_classes): if topk_loose[i] == 0: thresholds.append(float("inf")) elif len(scores[i]) < topk_loose[i]: thresholds.append(-1) else: thresholds.append(scores[i][0]) # Add -1 for background thresholds = torch.FloatTensor(thresholds + [-1]).to(model.device) # Clamp at minimum thresholds return torch.max(thresholds, init_thresholds) def update_scores(scores, inputs, outputs): updated = set() for image, output in zip(inputs, outputs): if isinstance(output, dict): instances = output["instances"] else: instances = output curr_labels = instances.pred_classes.int().tolist() curr_scores = instances.scores.cpu().tolist() for label, score in zip(curr_labels, curr_scores): # label = label.int().item() # scores[label].append((image["image_id"], score.cpu().item())) if len(scores[label]) >= topk_loose[label]: if score < scores[label][0]: continue else: heapq.heappushpop(scores[label], score) else: heapq.heappush(scores[label], score) updated.add(label) def gather_scores(process_scores): # List of scores per process scores_list = comm.all_gather(process_scores) gathered = defaultdict(list) labels = {x for scores in scores_list for x in scores.keys()} for label in labels: # Sort in descending order. sorted_generator = heapq.merge( *[sorted(x[label], reverse=True) for x in scores_list], reverse=True ) top_k = itertools.islice(sorted_generator, topk_loose[label]) top_k_ascending = list(reversed(list(top_k))) # Return to ascending order heapq.heapify(top_k_ascending) gathered[label] = top_k_ascending return gathered with inference_context(model), torch.no_grad(): ######### # Phase 1: Compute initial, low score thresholds without mask branch. ######### # First, get an estimate of score thresholds with the mask branch off. # Otherwise, in the initial few images, we will run the mask branch on a bunch # of useless proposals which makes everything slow. num_estimate = min(num_estimate, len(data_loader)) for idx, inputs in enumerate( tqdm( data_loader, desc="Computing score thresholds", total=num_estimate, disable=comm.get_rank() != 0, ) ): if idx > num_estimate: break # Gather scores from other processes periodically. # In early iterations, the thresholds are low, making inference slow and # gather relatively fast, so we gather more often. # Later, the thresholds are high enough that inference is fast and gathering # is slow, so we stop gathering. if (idx < 100 and idx % 10 == 0) or (idx % 500 == 0): global_scores = gather_scores(process_scores) thresholds = get_thresholds(global_scores, init_thresholds) if idx % 1000 == 0: # Save thresholds for later runs torch.save(thresholds, init_threshold_path) with per_class_thresholded_inference(model, thresholds, topk): with _turn_off_roi_heads(model, ["mask_on", "keypoint_on"]): outputs = model.inference(inputs, do_postprocess=False) update_scores(global_scores, inputs, outputs) update_scores(process_scores, inputs, outputs) if (idx < 100 and idx % 10 == 0) or (idx % 100 == 0): logger.info( "Threshold range (%s, %s); # collected: (%s, %s)", thresholds[:-1].min(), thresholds[:-1].max(), min(len(x) for x in global_scores.values()), max(len(x) for x in global_scores.values()), ) del global_scores # Necessary to avoid timeout when gathering? comm.synchronize() # Map class to scores of predictions so far. init_scores = gather_scores(process_scores) # Minimum thresholds from the estimate stage init_thresholds = get_thresholds(init_scores, init_thresholds) # Clear scores from estimates; we will start tracking them again. scores = defaultdict(list) ######### # Phase 2: Collect top-k predictions, with mask branch enabled. ######### for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 start_compute_time = time.perf_counter() thresholds = get_thresholds(scores, init_thresholds) with per_class_thresholded_inference(model, thresholds, topk): with limit_mask_branch_proposals(model, max_proposals=300): outputs = model(inputs) update_scores(scores, inputs, outputs) if torch.cuda.is_available(): torch.cuda.synchronize() total_compute_time += time.perf_counter() - start_compute_time evaluator.process(inputs, outputs) iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) seconds_per_img = total_compute_time / iters_after_start if idx >= num_warmup * 2 or seconds_per_img > 5: total_seconds_per_img = ( time.perf_counter() - start_time ) / iters_after_start eta = datetime.timedelta( seconds=int(total_seconds_per_img * (total - idx - 1)) ) log_every_n_seconds( logging.INFO, "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta) ), n=5, name=logger.name, ) # Clear unnecessary predictions every so often. if idx < 100 or ((idx + 1) % 10) == 0: by_cat = defaultdict(list) for pred in evaluator._predictions: for ann in pred["instances"]: by_cat[ann["category_id"]].append(ann) topk_preds = [] for c, anns in by_cat.items(): topk_preds.extend( sorted(anns, key=lambda a: a["score"], reverse=True)[: topk[c]] ) evaluator._predictions = [{"instances": topk_preds}] if evaluator._output_dir: PathManager.mkdirs(evaluator._output_dir) file_path = os.path.join( evaluator._output_dir, f"instances_predictions_rank{comm.get_rank()}.pth" ) with PathManager.open(file_path, "wb") as f: torch.save(evaluator._predictions, f) # Necessary to avoid timeout when gathering? comm.synchronize() # Limit number of detections per category across workers. predictions = comm.gather(evaluator._predictions, dst=0) if comm.is_main_process(): predictions = list(itertools.chain(*predictions)) by_cat = defaultdict(list) for pred in predictions: for ann in pred["instances"]: by_cat[ann["category_id"]].append(ann) logger.info(f"Max per cat: {max([len(v) for v in by_cat.values()])}") logger.info(f"Min per cat: {min([len(v) for v in by_cat.values()])}") topk_preds = [] for c, anns in by_cat.items(): topk_preds.extend( sorted(anns, key=lambda a: a["score"], reverse=True)[: topk[c]] ) evaluator._predictions = [{"instances": topk_preds}] else: evaluator._predictions = [] # Measure the time only for this worker (before the synchronization barrier) total_time = time.perf_counter() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( total_time_str, total_time / (total - num_warmup), num_devices ) ) total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( total_compute_time_str, total_compute_time / (total - num_warmup), num_devices, ) ) results = evaluator.evaluate() # An evaluator may return None when not in main process. # Replace it by an empty dict instead to make it easier for downstream code to handle if results is None: results = {} return results
def save_result_of_dataset(cfg, model, data_loader, output_dir, dataset_name): """ Run model (in eval mode) on the data_loader and save predictions Args: cfg: config model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. Returns: The return value of `evaluator.evaluate()` """ cpu_device = torch.device("cpu") num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) # NOTE: dataset name should be the same as TRAIN to get the correct meta _metadata = MetadataCatalog.get(dataset_name) data_ref = ref.__dict__[_metadata.ref_key] obj_names = _metadata.objs obj_ids = [data_ref.obj2id[obj_name] for obj_name in obj_names] result_name = "results.pkl" mmcv.mkdir_or_exist(output_dir) result_path = osp.join(output_dir, result_name) total = len(data_loader) # inference data loader must have a fixed length results = OrderedDict() VIS = False logging_interval = 50 num_warmup = min(5, logging_interval - 1, total - 1) start_time = time.perf_counter() total_compute_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 if VIS: images_ori = [_input["image"].clone() for _input in inputs] start_compute_time = time.perf_counter() outputs = model(inputs) # NOTE: do model inference torch.cuda.synchronize() cur_compute_time = time.perf_counter() - start_compute_time total_compute_time += cur_compute_time # NOTE: process results for i in range(len(inputs)): _input = inputs[i] output = outputs[i] cur_results = {} instances = output["instances"] HAS_MASK = False if instances.has("pred_masks"): HAS_MASK = True pred_masks = instances.pred_masks # (#objs, imH, imW) pred_masks = pred_masks.detach().cpu().numpy() # NOTE: time comsuming step rles = [ binary_mask_to_rle(pred_masks[_k]) for _k in range(len(pred_masks)) ] instances = instances.to(cpu_device) boxes = instances.pred_boxes.tensor.clone().detach().cpu( ).numpy() # xyxy scores = instances.scores.tolist() labels = instances.pred_classes.detach().cpu().numpy() obj_ids = [ data_ref.obj2id[obj_names[int(label)]] for label in labels ] ego_quats = instances.pred_ego_quats.detach().cpu().numpy() ego_rots = [ quat2mat(ego_quats[k]) for k in range(len(ego_quats)) ] transes = instances.pred_transes.detach().cpu().numpy() cur_results = { "time": cur_compute_time / len(inputs), "obj_ids": obj_ids, "scores": scores, "boxes": boxes, # xyxy "Rs": ego_rots, "ts": transes, # m } if HAS_MASK: cur_results["masks"] = rles if VIS: import cv2 from lib.vis_utils.image import vis_image_mask_bbox_cv2 image = (images_ori[i].detach().cpu().numpy().transpose( 1, 2, 0) + 0.5).astype("uint8") img_vis = vis_image_mask_bbox_cv2( image, pred_masks, boxes, labels=[obj_names[int(label)] for label in labels]) cv2.imshow("img", img_vis.astype("uint8")) cv2.waitKey() results[_input["scene_im_id"]] = cur_results if (idx + 1) % logging_interval == 0: duration = time.perf_counter() - start_time seconds_per_img = duration / (idx + 1 - num_warmup) eta = datetime.timedelta(seconds=int(seconds_per_img * (total - num_warmup) - duration)) logger.info( "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta))) # Measure the time only for this worker (before the synchronization barrier) total_time = int(time.perf_counter() - start_time) total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)". format(total_time_str, total_time / (total - num_warmup), num_devices)) total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) mmcv.dump(results, result_path) logger.info("Results saved to {}".format(result_path))
def gdrn_inference_on_dataset(cfg, model, data_loader, evaluator, amp_test=False): """Run model on the data_loader and evaluate the metrics with evaluator. Also benchmark the inference speed of `model.forward` accurately. The model will be used in eval mode. Args: model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want to benchmark, but don't want to do any evaluation. Returns: The return value of `evaluator.evaluate()` """ num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) total = len(data_loader) # inference data loader must have a fixed length if evaluator is None: # create a no-op evaluator evaluator = DatasetEvaluators([]) evaluator.reset() num_warmup = min(5, total - 1) start_time = time.perf_counter() total_compute_time = 0 total_process_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 total_process_time = 0 start_compute_time = time.perf_counter() ############################# # process input batch = batch_data(cfg, inputs, phase="test") if evaluator.train_objs is not None: roi_labels = batch["roi_cls"].cpu().numpy().tolist() obj_names = [evaluator.obj_names[_l] for _l in roi_labels] if all(_obj not in evaluator.train_objs for _obj in obj_names): continue # if cfg.DEBUG: # for i in range(len(batch["roi_cls"])): # vis_roi_im = batch["roi_img"][i].cpu().numpy().transpose(1,2,0)[:, :, ::-1] # show_ims = [vis_roi_im] # show_titles = ["roi_im"] # # vis_coor2d = batch["roi_coord_2d"][i].cpu().numpy() # show_ims.extend([vis_coor2d[0], vis_coor2d[1]]) # show_titles.extend(["coord_2d_x", "coord_2d_y"]) # grid_show(show_ims, show_titles, row=1, col=3) with autocast(enabled=amp_test): out_dict = model( batch["roi_img"], roi_classes=batch["roi_cls"], roi_cams=batch["roi_cam"], roi_whs=batch["roi_wh"], roi_centers=batch["roi_center"], resize_ratios=batch["resize_ratio"], roi_coord_2d=batch.get("roi_coord_2d", None), roi_extents=batch.get("roi_extent", None), ) if torch.cuda.is_available(): torch.cuda.synchronize() cur_compute_time = time.perf_counter() - start_compute_time total_compute_time += cur_compute_time # NOTE: added # TODO: add detection time here outputs = [{} for _ in range(len(inputs))] for _i in range(len(outputs)): outputs[_i]["time"] = cur_compute_time start_process_time = time.perf_counter() evaluator.process(inputs, outputs, out_dict) # RANSAC/PnP cur_process_time = time.perf_counter() - start_process_time total_process_time += cur_process_time iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) seconds_per_img = total_compute_time / iters_after_start if idx >= num_warmup * 2 or seconds_per_img > 5: total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) log_every_n_seconds( logging.INFO, f"Inference done {idx+1}/{total}. {seconds_per_img:.4f} s / img. ETA={str(eta)}", n=5) # Measure the time only for this worker (before the synchronization barrier) total_time = time.perf_counter() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( f"Total inference time: {total_time_str} " f"({total_time / (total - num_warmup):.6f} s / img per device, on {num_devices} devices)" ) # pure forward time total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) # post_process time total_process_time_str = str( datetime.timedelta(seconds=int(total_process_time))) logger.info( "Total inference post process time: {} ({:.6f} s / img per device, on {} devices)" .format(total_process_time_str, total_process_time / (total - num_warmup), num_devices)) results = evaluator.evaluate() # results is always None # An evaluator may return None when not in main process. # Replace it by an empty dict instead to make it easier for downstream code to handle if results is None: results = {} return results
import os from detectron2.structures import Boxes save_dir = '/home/luoyp/disk1/grid-feats-vqa/feats' region_before = h5py.File(os.path.join(save_dir,'region_before_X152.hdf5'),'w') # region_after = h5py.File(os.path.join(save_dir,'region_after.hdf5'),'w') # grid7 = h5py.File(os.path.join(save_dir,'my_grid7.hdf5'),'w') # original_grid = h5py.File(os.path.join(save_dir,'original_grid7.hdf5'),'w') thresh = 0.2 max_regions = 100 pooling = torch.nn.AdaptiveAvgPool2d((7,7)) image_id_collector = [] for dataset_name in ['coco_2014_train','coco_2014_val']: with inference_context(model): dump_folder = os.path.join(cfg.OUTPUT_DIR, "features", dataset_to_folder_mapper[dataset_name]) PathManager.mkdirs(dump_folder) data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name) for idx, inputs in enumerate(tqdm.tqdm(data_loader)): with torch.no_grad(): image_id = inputs[0]['image_id'] file_name = '%d.pth' % image_id images = model.preprocess_image(inputs) features = model.backbone(images.tensor) proposals, _ = model.proposal_generator(images, features) proposal_boxes = [x.proposal_boxes for x in proposals] features = [features[f] for f in model.roi_heads.in_features] box_features1 = model.roi_heads.box_pooler(features, [x.proposal_boxes for x in proposals])
def evaluate_test(model, data_loader, vis_preds=False): """ This function evaluates the model on the dataset defined by data_loader. The metrics reported are described in Table 2 of our paper. """ # Note that all eval runs on main process assert comm.is_main_process() deprocess = imagenet_deprocess(rescale_image=False) device = torch.device("cuda:0") # evaluation class_names = { "02828884": "bench", "03001627": "chair", "03636649": "lamp", "03691459": "speaker", "04090263": "firearm", "04379243": "table", "04530566": "watercraft", "02691156": "plane", "02933112": "cabinet", "02958343": "car", "03211117": "monitor", "04256520": "couch", "04401088": "cellphone", } num_instances = {i: 0 for i in class_names} chamfer = {i: 0 for i in class_names} normal = {i: 0 for i in class_names} f1_01 = {i: 0 for i in class_names} f1_03 = {i: 0 for i in class_names} f1_05 = {i: 0 for i in class_names} num_batch_evaluated = 0 for batch in data_loader: batch = data_loader.postprocess(batch, device) sids = [id_str.split("-")[0] for id_str in batch["id_strs"]] for sid in sids: num_instances[sid] += 1 with inference_context(model): model_kwargs = {} module = model.module if hasattr(model, "module") else model if isinstance(module, VoxMeshMultiViewHead): model_kwargs["intrinsics"] = batch["intrinsics"] model_kwargs["extrinsics"] = batch["extrinsics"] if isinstance(module, VoxMeshDepthHead): model_kwargs["masks"] = batch["masks"] model_outputs = model(batch["imgs"], **model_kwargs) voxel_scores = model_outputs["voxel_scores"] meshes_pred = model_outputs["meshes_pred"] cur_metrics = compare_meshes(meshes_pred[-1], batch["meshes"], reduce=False) cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh().cpu() cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh().cpu() for i, sid in enumerate(sids): chamfer[sid] += cur_metrics["Chamfer-L2"][i].item() normal[sid] += cur_metrics["AbsNormalConsistency"][i].item() f1_01[sid] += cur_metrics["F1@%f" % 0.1][i].item() f1_03[sid] += cur_metrics["F1@%f" % 0.3][i].item() f1_05[sid] += cur_metrics["F1@%f" % 0.5][i].item() if vis_preds: img = image_to_numpy(deprocess(batch["imgs"][i])) vis_utils.visualize_prediction( batch["id_strs"][i], img, meshes_pred[-1][i], "/tmp/output" ) num_batch_evaluated += 1 logger.info("Evaluated %d / %d batches" % (num_batch_evaluated, len(data_loader))) vis_utils.print_instances_class_histogram( num_instances, class_names, {"chamfer": chamfer, "normal": normal, "f1_01": f1_01, "f1_03": f1_03, "f1_05": f1_05}, )