Python inference_contextの例、detectron2.evaluation.inference_context Pythonの例

コード例 #1

0

ファイルを表示

def do_feature_extraction(cfg, model, dataset_name):
    with inference_context(model):
        dump_folder = os.path.join(cfg.OUTPUT_DIR, "features",
                                   dataset_to_folder_mapper[dataset_name])
        PathManager.mkdirs(dump_folder)
        # data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name)
        extract_grid_feature_on_local(model, dump_folder, 'data/train_images')

コード例 #2

0

ファイルを表示

def do_feature_extraction(cfg, model, image_dir, image_list):
    with inference_context(model):
        # dump_folder = os.path.join(cfg.OUTPUT_DIR, "features")
        dump_folder = 'gridfeats'
        PathManager.mkdirs(dump_folder)
        # data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name)
        extract_grid_feature_on_dataset(model, image_dir, image_list,
                                        dump_folder)

コード例 #3

0

ファイルを表示

def do_feature_extraction(cfg, model, args):
    dataset_name = args.dataset
    with inference_context(model):
        # edit config file
        cfg.defrost()
        cfg.OUTPUT_DIR = args.output_dir
        cfg.freeze()

        dump_folder = os.path.join(cfg.OUTPUT_DIR, "features",
                                   dataset_to_folder_mapper[dataset_name])
        PathManager.mkdirs(dump_folder)
        data_loader = build_detection_test_loader_with_attributes(
            cfg, dataset_name)
        extract_grid_feature_on_dataset(model, data_loader, dump_folder, args)

コード例 #4

0

ファイルを表示

    def get_meta_attention(cls, cfg, model):
        device = next(model.parameters()).device
        base_ids = torch.tensor(cfg.DATASETS.FEWSHOT.BASE_CLASSES_ID).long()
        novel_ids = torch.tensor(cfg.DATASETS.FEWSHOT.NOVEL_CLASSES_ID).long()
        base_ids = base_ids.to(device)
        novel_ids = novel_ids.to(device)

        base_data_loader = cls.build_base_meta_loader(cfg)
        _base_meta_data_loader_iter = iter(base_data_loader)

        base_data = next(_base_meta_data_loader_iter)
        with inference_context(model), torch.no_grad():
            meta_attention = model(None,
                                   meta_data=base_data,
                                   return_attention=True)
        return meta_attention

コード例 #5

0

ファイルを表示

ファイル: my_inference.py プロジェクト: a7532ariel/grid-feats-vqa

def inference(cfg, model, dataset_name, dataset_path):
    with inference_context(model):
        if dataset_name not in dataset_to_folder_mapper:
            dataset_to_folder_mapper[dataset_name] = dataset_name
            set_metadata(dataset_name)
            data_loader = build_detection_test_loader_for_images(
                cfg, dataset_path)
        else:
            data_loader = build_detection_test_loader_with_attributes(
                cfg, dataset_name)

        dump_folder = os.path.join(cfg.OUTPUT_DIR, "my_inference",
                                   dataset_to_folder_mapper[dataset_name])
        PathManager.mkdirs(dump_folder)

        inference_on_dataset(cfg, model, data_loader, dataset_name,
                             dump_folder)

コード例 #6

0

ファイルを表示

def training_loop(cfg, cp, model, optimizer, scheduler, loaders, device,
                  loss_fn):

    #if comm.is_main_process():
    #    wandb.init(project='MeshRCNN', config=cfg, name='prediction_module')

    Timer.timing = False
    iteration_timer = Timer("Iteration")

    # model.parameters() is surprisingly expensive at 150ms, so cache it
    if hasattr(model, "module"):
        params = list(model.module.parameters())
    else:
        params = list(model.parameters())
    loss_moving_average = cp.data.get("loss_moving_average", None)

    # Zhengyuan modification
    loss_predictor = LossPredictionModule().to(device)
    loss_pred_optim = torch.optim.Adam(loss_predictor.parameters(), lr=1e-5)

    while cp.epoch < cfg.SOLVER.NUM_EPOCHS:
        if comm.is_main_process():
            logger.info("Starting epoch %d / %d" %
                        (cp.epoch + 1, cfg.SOLVER.NUM_EPOCHS))

        # When using a DistributedSampler we need to manually set the epoch so that
        # the data is shuffled differently at each epoch
        for loader in loaders.values():
            if hasattr(loader.sampler, "set_epoch"):
                loader.sampler.set_epoch(cp.epoch)

        # Config settings for renderer
        render_image_size = 256
        blend_params = BlendParams(sigma=1e-4, gamma=1e-4)
        raster_settings = RasterizationSettings(
            image_size=render_image_size,
            blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma,
            faces_per_pixel=50,
        )
        rot_y_90 = torch.tensor([[0, 0, 1, 0], [0, 1, 0, 0], [-1, 0, 0, 0],
                                 [0, 0, 0, 1]]).float().to(device)

        for i, batch in enumerate(loaders["train"]):
            if i == 0:
                iteration_timer.start()
            else:
                iteration_timer.tick()

            batch = loaders["train"].postprocess(batch, device)
            if dataset == 'MeshVoxMulti':
                imgs, meshes_gt, points_gt, normals_gt, voxels_gt, id_strs, _, render_RTs, RTs = batch
            else:
                imgs, meshes_gt, points_gt, normals_gt, voxels_gt = batch

            with inference_context(model):
                # NOTE: _imgs contains all of the other images in belonging to this model
                # We have to select the next-best-view from that list of images

                model_kwargs = {}
                if cfg.MODEL.VOXEL_ON and cp.t < cfg.MODEL.VOXEL_HEAD.VOXEL_ONLY_ITERS:
                    model_kwargs["voxel_only"] = True
                with Timer("Forward"):
                    voxel_scores, meshes_pred = model(imgs, **model_kwargs)

            total_silh_loss = torch.tensor(
                0.)  # Total silhouette loss, to be added to "loss" below
            # Voxel only training for first few iterations
            if not meshes_gt is None and not model_kwargs.get(
                    "voxel_only", False):
                _meshes_pred = meshes_pred[-1].clone()
                _meshes_gt = meshes_gt[-1].clone()

                # Render masks from predicted mesh for each view
                # GT probability map to supervise prediction module
                B = len(meshes_gt)
                probability_map = 0.01 * torch.ones(
                    (B, 24)).to(device)  # batch size x 24
                viewgrid = torch.zeros(
                    (B, 24, render_image_size,
                     render_image_size)).to(device)  # batch size x 24 x H x W
                for b, (cur_gt_mesh, cur_pred_mesh) in enumerate(
                        zip(meshes_gt, _meshes_pred)):
                    # Maybe computationally expensive, but need to transform back to world space based on rendered image viewpoint
                    RT = RTs[b]
                    # Rotate 90 degrees about y-axis and invert
                    invRT = torch.inverse(RT.mm(rot_y_90))
                    invRT_no_rot = torch.inverse(RT)  # Just invert

                    cur_pred_mesh._verts_list[0] = project_verts(
                        cur_pred_mesh._verts_list[0], invRT)
                    sid = id_strs[b].split('-')[0]

                    # For some strange reason all classes (expect vehicle class) require a 90 degree rotation about the y-axis
                    if sid == '02958343':
                        cur_gt_mesh._verts_list[0] = project_verts(
                            cur_gt_mesh._verts_list[0], invRT_no_rot)
                    else:
                        cur_gt_mesh._verts_list[0] = project_verts(
                            cur_gt_mesh._verts_list[0], invRT)

                    for iid in range(len(render_RTs[b])):

                        R = render_RTs[b][iid][:3, :3].unsqueeze(0)
                        T = render_RTs[b][iid][:3, 3].unsqueeze(0)
                        cameras = OpenGLPerspectiveCameras(device=device,
                                                           R=R,
                                                           T=T)
                        silhouette_renderer = MeshRenderer(
                            rasterizer=MeshRasterizer(
                                cameras=cameras,
                                raster_settings=raster_settings),
                            shader=SoftSilhouetteShader(
                                blend_params=blend_params))

                        ref_image = (silhouette_renderer(
                            meshes_world=cur_gt_mesh, R=R, T=T) > 0).float()
                        image = (silhouette_renderer(
                            meshes_world=cur_pred_mesh, R=R, T=T) > 0).float()

                        #Add image silhouette to viewgrid
                        viewgrid[b, iid] = image[..., -1]
                        '''
                        import matplotlib.pyplot as plt
                        plt.subplot(1,2,1)
                        plt.imshow(ref_image[0,:,:,3].detach().cpu().numpy())
                        plt.subplot(1,2,2)
                        plt.imshow(image[0,:,:,3].detach().cpu().numpy())
                        plt.show()
                        '''

                        # MSE Loss between both silhouettes
                        silh_loss = torch.sum(
                            (image[0, :, :, 3] - ref_image[0, :, :, 3])**2)
                        probability_map[b, iid] = silh_loss.detach()

                        total_silh_loss += silh_loss

                probability_map = probability_map / (torch.max(
                    probability_map, dim=1)[0].unsqueeze(1))  # Normalize

                probability_map = torch.nn.functional.softmax(
                    probability_map, dim=1).to(device)  # Softmax across images
                #nbv_idx = torch.argmax(probability_map, dim=1)  # Next-best view indices
                #nbv_imgs = _imgs[torch.arange(B), nbv_idx]  # Next-best view images

                # NOTE: Do a second forward pass through the model? This time for multi-view reconstruction
                # The input should be the first image and the next-best view
                #voxel_scores, meshes_pred = model(nbv_imgs, **model_kwargs)

                # Zhengyuan step loss_prediction
                predictor_loss = loss_predictor.train_batch(
                    viewgrid, probability_map, loss_pred_optim)
                if comm.is_main_process():
                    #wandb.log({'prediction module loss':predictor_loss})

                    if cp.t % 50 == 0:
                        print('{} predictor_loss: {}'.format(
                            cp.t, predictor_loss))

                    #Save checkpoint every t iteration
                    if cp.t % 500 == 0:
                        print(
                            'Saving loss prediction module at iter {}'.format(
                                cp.t))
                        os.makedirs('./output_prediction_module',
                                    exist_ok=True)
                        torch.save(
                            loss_predictor.state_dict(),
                            './output_prediction_module/prediction_module_' +
                            str(cp.t) + '.pth')

            cp.step()

            if cp.t % cfg.SOLVER.CHECKPOINT_PERIOD == 0:
                eval_and_save(model, loaders, optimizer, scheduler, cp)
        cp.step_epoch()
    eval_and_save(model, loaders, optimizer, scheduler, cp)

    if comm.is_main_process():
        logger.info("Evaluating on test set:")
        test_loader = build_data_loader(cfg, dataset, "test", multigpu=False)
        evaluate_test(model, test_loader)

コード例 #7

0

ファイルを表示

def inference_on_dataset(model, data_loader, evaluator):
    """#NOTE: modified to add time
    Run model on the data_loader and evaluate the metrics with evaluator.
    Also benchmark the inference speed of `model.forward` accurately.
    The model will be used in eval mode.
    Args:
        model (nn.Module): a module which accepts an object from
            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
            If you wish to evaluate a model in `training` mode instead, you can
            wrap the given model and override its behavior of `.eval()` and `.train()`.
        data_loader: an iterable object with a length.
            The elements it generates will be the inputs to the model.
        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
            to benchmark, but don't want to do any evaluation.
    Returns:
        The return value of `evaluator.evaluate()`
    """
    num_devices = get_world_size()
    logger = logging.getLogger(__name__)
    logger.info("Start inference on {} images".format(len(data_loader)))

    total = len(data_loader)  # inference data loader must have a fixed length
    if evaluator is None:
        # create a no-op evaluator
        evaluator = DatasetEvaluators([])
    evaluator.reset()

    num_warmup = min(5, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0
    with inference_context(model), torch.no_grad():
        for idx, inputs in enumerate(data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0

            start_compute_time = time.perf_counter()
            outputs = model(inputs)
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            cur_compute_time = time.perf_counter() - start_compute_time
            total_compute_time += cur_compute_time
            for _o in outputs:
                _o['time'] = cur_compute_time / len(outputs)
            evaluator.process(inputs, outputs)

            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() -
                                         start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img *
                                                     (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)),
                    n=5,
                )

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = time.perf_counter() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".
        format(total_time_str, total_time / (total - num_warmup), num_devices))
    total_compute_time_str = str(
        datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)"
        .format(total_compute_time_str,
                total_compute_time / (total - num_warmup), num_devices))

    results = evaluator.evaluate()
    # An evaluator may return None when not in main process.
    # Replace it by an empty dict instead to make it easier for downstream code to handle
    if results is None:
        results = {}
    return results

コード例 #8

0

ファイルを表示

ファイル: eval.py プロジェクト: natlouis/MultiViewMeshReconstruction

def evaluate_test(model, data_loader, vis_preds=False):
    """
    This function evaluates the model on the dataset defined by data_loader.
    The metrics reported are described in Table 2 of our paper.
    """
    # Note that all eval runs on main process
    assert comm.is_main_process()
    deprocess = imagenet_deprocess(rescale_image=False)
    device = torch.device("cuda:0")
    # evaluation
    class_names = {
        "02828884": "bench",
        "03001627": "chair",
        "03636649": "lamp",
        "03691459": "speaker",
        "04090263": "firearm",
        "04379243": "table",
        "04530566": "watercraft",
        "02691156": "plane",
        "02933112": "cabinet",
        "02958343": "car",
        "03211117": "monitor",
        "04256520": "couch",
        "04401088": "cellphone",
    }

    num_instances = {i: 0 for i in class_names}
    chamfer = {i: 0 for i in class_names}
    normal = {i: 0 for i in class_names}
    f1_01 = {i: 0 for i in class_names}
    f1_03 = {i: 0 for i in class_names}
    f1_05 = {i: 0 for i in class_names}

    num_batch_evaluated = 0
    for batch in data_loader:
        batch = data_loader.postprocess(batch, device)
        imgs, meshes_gt, _, _, _, id_strs, _imgs = batch

        #NOTE: _imgs contains all of the other images in belonging to this model
        #We have to select the next-best-view from that list of images

        sids = [id_str.split("-")[0] for id_str in id_strs]
        for sid in sids:
            num_instances[sid] += 1

        with inference_context(model):
            voxel_scores, meshes_pred = model(imgs)

            #TODO: Render masks from predicted mesh for each view

            cur_metrics = compare_meshes(meshes_pred[-1],
                                         meshes_gt,
                                         reduce=False)
            cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh(
            ).cpu()
            cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh(
            ).cpu()

            for i, sid in enumerate(sids):
                chamfer[sid] += cur_metrics["Chamfer-L2"][i].item()
                normal[sid] += cur_metrics["AbsNormalConsistency"][i].item()
                f1_01[sid] += cur_metrics["F1@%f" % 0.1][i].item()
                f1_03[sid] += cur_metrics["F1@%f" % 0.3][i].item()
                f1_05[sid] += cur_metrics["F1@%f" % 0.5][i].item()

                if vis_preds:
                    img = image_to_numpy(deprocess(imgs[i]))
                    vis_utils.visualize_prediction(id_strs[i], img,
                                                   meshes_pred[-1][i],
                                                   "/tmp/output")

            num_batch_evaluated += 1
            logger.info("Evaluated %d / %d batches" %
                        (num_batch_evaluated, len(data_loader)))

    vis_utils.print_instances_class_histogram(
        num_instances,
        class_names,
        {
            "chamfer": chamfer,
            "normal": normal,
            "f1_01": f1_01,
            "f1_03": f1_03,
            "f1_05": f1_05
        },
    )

コード例 #9

0

ファイルを表示

ファイル: eval.py プロジェクト: natlouis/MultiViewMeshReconstruction

def evaluate_test_p2m(model, data_loader):
    """
    This function evaluates the model on the dataset defined by data_loader.
    The metrics reported are described in Table 1 of our paper, following previous
    reported approaches (like Pixel2Mesh - p2m), where meshes are
    rescaled by a factor of 0.57. See the paper for more details.
    """
    assert comm.is_main_process()
    device = torch.device("cuda:0")
    # evaluation
    class_names = {
        "02828884": "bench",
        "03001627": "chair",
        "03636649": "lamp",
        "03691459": "speaker",
        "04090263": "firearm",
        "04379243": "table",
        "04530566": "watercraft",
        "02691156": "plane",
        "02933112": "cabinet",
        "02958343": "car",
        "03211117": "monitor",
        "04256520": "couch",
        "04401088": "cellphone",
    }

    num_instances = {i: 0 for i in class_names}
    chamfer = {i: 0 for i in class_names}
    normal = {i: 0 for i in class_names}
    f1_1e_4 = {i: 0 for i in class_names}
    f1_2e_4 = {i: 0 for i in class_names}

    num_batch_evaluated = 0
    for batch in data_loader:
        batch = data_loader.postprocess(batch, device)
        imgs, meshes_gt, _, _, _, id_strs = batch
        sids = [id_str.split("-")[0] for id_str in id_strs]
        for sid in sids:
            num_instances[sid] += 1

        with inference_context(model):
            voxel_scores, meshes_pred = model(imgs)
            # NOTE that for the F1 thresholds we take the square root of 1e-4 & 2e-4
            # as `compare_meshes` returns the euclidean distance (L2) of two pointclouds.
            # In Pixel2Mesh, the squared L2 (L2^2) is computed instead.
            # i.e. (L2^2 < τ) <=> (L2 < sqrt(τ))
            cur_metrics = compare_meshes(meshes_pred[-1],
                                         meshes_gt,
                                         scale=0.57,
                                         thresholds=[0.01, 0.014142],
                                         reduce=False)
            cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh(
            ).cpu()
            cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh(
            ).cpu()

            for i, sid in enumerate(sids):
                chamfer[sid] += cur_metrics["Chamfer-L2"][i].item()
                normal[sid] += cur_metrics["AbsNormalConsistency"][i].item()
                f1_1e_4[sid] += cur_metrics["F1@%f" % 0.01][i].item()
                f1_2e_4[sid] += cur_metrics["F1@%f" % 0.014142][i].item()

            num_batch_evaluated += 1
            logger.info("Evaluated %d / %d batches" %
                        (num_batch_evaluated, len(data_loader)))

    vis_utils.print_instances_class_histogram_p2m(
        num_instances,
        class_names,
        {
            "chamfer": chamfer,
            "normal": normal,
            "f1_1e_4": f1_1e_4,
            "f1_2e_4": f1_2e_4
        },
    )

コード例 #10

0

ファイルを表示

def inference_custom(model, data_loader, evaluator):

    num_devices = get_world_size()
    logger = logging.getLogger(__name__)
    logger.info("Start inference on {} images".format(len(data_loader)))

    total = len(data_loader)  # inference data loader must have a fixed length
    if evaluator is None:
        # create a no-op evaluator
        evaluator = DatasetEvaluators([])
    evaluator.reset()

    num_warmup = min(5, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0

    with inference_context(model), torch.no_grad():
        for idx, inputs in enumerate(data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0

            start_compute_time = time.perf_counter()
            outputs = model(inputs)
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            evaluator.process(inputs, outputs)

            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() -
                                         start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img *
                                                     (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)),
                    n=5,
                )

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = time.perf_counter() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".
        format(total_time_str, total_time / (total - num_warmup), num_devices))
    total_compute_time_str = str(
        datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)"
        .format(total_compute_time_str,
                total_compute_time / (total - num_warmup), num_devices))

    results = evaluator.evaluate()
    # An evaluator may return None when not in main process.
    # Replace it by an empty dict instead to make it easier for downstream code to handle
    if results is None:
        results = {}
    return results

コード例 #11

0

ファイルを表示

def inference_on_dataset(
    model, data_loader, evaluator, num_classes, topk, num_estimate, min_score
):
    """
    Run model on the data_loader and evaluate the metrics with evaluator.
    Also benchmark the inference speed of `model.forward` accurately.
    The model will be used in eval mode.

    Args:
        model (nn.Module): a module which accepts an object from
            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.

            If you wish to evaluate a model in `training` mode instead, you can
            wrap the given model and override its behavior of `.eval()` and `.train()`.
        data_loader: an iterable object with a length.
            The elements it generates will be the inputs to the model.
        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
            to benchmark, but don't want to do any evaluation.
        topk (int)
        num_estimate (int): Number of images to estimate initial score threshold.

    Returns:
        The return value of `evaluator.evaluate()`
    """
    num_devices = get_world_size()
    logger.info("Start inference on {} images".format(len(data_loader)))
    if isinstance(topk, int):
        logger.info(f"Collecting top-{topk} images.")
        topk = [topk] * num_classes
    else:
        logger.info(f"Collecting top-k images. Counts:\n{topk}")

    total = len(data_loader)  # inference data loader must have a fixed length
    if evaluator is None:
        # create a no-op evaluator
        evaluator = DatasetEvaluators([])
    evaluator.reset()

    num_warmup = min(5, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0

    # We keep track of scores from _this_ process (process_scores) and scores from
    # all processes (scores). Every iter, each process updates process_scores and its
    # local scores with the new scores from the model.
    # Every few iterations, all processes pass their process_scores to each other and
    # updates their own global scores.

    # Map category id to min-heap of top scores from this process.
    process_scores = defaultdict(list)
    # Map category id to min-heap of top scores from all processes.
    global_scores = defaultdict(list)
    init_thresholds = torch.full(
        (num_classes + 1,), fill_value=min_score, dtype=torch.float32
    ).to(model.device)
    init_threshold_path = Path(evaluator._output_dir) / "_thresholds_checkpoint.pth"
    if init_threshold_path.exists():
        logger.info("Loading thresholds from disk.")
        init_thresholds = torch.load(init_threshold_path).to(model.device)
    else:
        init_threshold_path.parent.mkdir(exist_ok=True, parents=True)

    # Trying to get exactly the top-k estimates can result in getting slightly fewer
    # than K estimates. This can happen due to subtle differences in the model's forward
    # pass in the first phase vs. the second phase. For example, in the first phase,
    # when we have low thresholds, D2 will use torchvision.ops.boxes.batched_nms for
    # batch NMS. In phase 2, D2 will use a slightly different, customized
    # implementation, which may occasionally result in fewer boxes.
    # To address this, we set thresholds to be a bit looser, targeting 10% more
    # predictions than requested.
    topk_loose = [int(ceil(k * 1.1)) for k in topk]

    def get_thresholds(scores, min_thresholds):
        thresholds = []
        for i in range(num_classes):
            if topk_loose[i] == 0:
                thresholds.append(float("inf"))
            elif len(scores[i]) < topk_loose[i]:
                thresholds.append(-1)
            else:
                thresholds.append(scores[i][0])
        # Add -1 for background
        thresholds = torch.FloatTensor(thresholds + [-1]).to(model.device)
        # Clamp at minimum thresholds
        return torch.max(thresholds, init_thresholds)

    def update_scores(scores, inputs, outputs):
        updated = set()
        for image, output in zip(inputs, outputs):
            if isinstance(output, dict):
                instances = output["instances"]
            else:
                instances = output
            curr_labels = instances.pred_classes.int().tolist()
            curr_scores = instances.scores.cpu().tolist()
            for label, score in zip(curr_labels, curr_scores):
                # label = label.int().item()
                # scores[label].append((image["image_id"], score.cpu().item()))
                if len(scores[label]) >= topk_loose[label]:
                    if score < scores[label][0]:
                        continue
                    else:
                        heapq.heappushpop(scores[label], score)
                else:
                    heapq.heappush(scores[label], score)
                updated.add(label)

    def gather_scores(process_scores):
        # List of scores per process
        scores_list = comm.all_gather(process_scores)
        gathered = defaultdict(list)
        labels = {x for scores in scores_list for x in scores.keys()}
        for label in labels:
            # Sort in descending order.
            sorted_generator = heapq.merge(
                *[sorted(x[label], reverse=True) for x in scores_list], reverse=True
            )
            top_k = itertools.islice(sorted_generator, topk_loose[label])
            top_k_ascending = list(reversed(list(top_k)))  # Return to ascending order
            heapq.heapify(top_k_ascending)
            gathered[label] = top_k_ascending
        return gathered

    with inference_context(model), torch.no_grad():
        #########
        # Phase 1: Compute initial, low score thresholds without mask branch.
        #########
        # First, get an estimate of score thresholds with the mask branch off.
        # Otherwise, in the initial few images, we will run the mask branch on a bunch
        # of useless proposals which makes everything slow.
        num_estimate = min(num_estimate, len(data_loader))
        for idx, inputs in enumerate(
            tqdm(
                data_loader,
                desc="Computing score thresholds",
                total=num_estimate,
                disable=comm.get_rank() != 0,
            )
        ):
            if idx > num_estimate:
                break
            # Gather scores from other processes periodically.
            # In early iterations, the thresholds are low, making inference slow and
            # gather relatively fast, so we gather more often.
            # Later, the thresholds are high enough that inference is fast and gathering
            # is slow, so we stop gathering.
            if (idx < 100 and idx % 10 == 0) or (idx % 500 == 0):
                global_scores = gather_scores(process_scores)

            thresholds = get_thresholds(global_scores, init_thresholds)
            if idx % 1000 == 0:  # Save thresholds for later runs
                torch.save(thresholds, init_threshold_path)

            with per_class_thresholded_inference(model, thresholds, topk):
                with _turn_off_roi_heads(model, ["mask_on", "keypoint_on"]):
                    outputs = model.inference(inputs, do_postprocess=False)
            update_scores(global_scores, inputs, outputs)
            update_scores(process_scores, inputs, outputs)

            if (idx < 100 and idx % 10 == 0) or (idx % 100 == 0):
                logger.info(
                    "Threshold range (%s, %s); # collected: (%s, %s)",
                    thresholds[:-1].min(),
                    thresholds[:-1].max(),
                    min(len(x) for x in global_scores.values()),
                    max(len(x) for x in global_scores.values()),
                )

        del global_scores
        # Necessary to avoid timeout when gathering?
        comm.synchronize()

        # Map class to scores of predictions so far.
        init_scores = gather_scores(process_scores)
        # Minimum thresholds from the estimate stage
        init_thresholds = get_thresholds(init_scores, init_thresholds)
        # Clear scores from estimates; we will start tracking them again.
        scores = defaultdict(list)

        #########
        # Phase 2: Collect top-k predictions, with mask branch enabled.
        #########
        for idx, inputs in enumerate(data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0

            start_compute_time = time.perf_counter()
            thresholds = get_thresholds(scores, init_thresholds)
            with per_class_thresholded_inference(model, thresholds, topk):
                with limit_mask_branch_proposals(model, max_proposals=300):
                    outputs = model(inputs)
            update_scores(scores, inputs, outputs)

            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            evaluator.process(inputs, outputs)

            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (
                    time.perf_counter() - start_time
                ) / iters_after_start
                eta = datetime.timedelta(
                    seconds=int(total_seconds_per_img * (total - idx - 1))
                )
                log_every_n_seconds(
                    logging.INFO,
                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)
                    ),
                    n=5,
                    name=logger.name,
                )

            # Clear unnecessary predictions every so often.
            if idx < 100 or ((idx + 1) % 10) == 0:
                by_cat = defaultdict(list)
                for pred in evaluator._predictions:
                    for ann in pred["instances"]:
                        by_cat[ann["category_id"]].append(ann)
                topk_preds = []
                for c, anns in by_cat.items():
                    topk_preds.extend(
                        sorted(anns, key=lambda a: a["score"], reverse=True)[: topk[c]]
                    )
                evaluator._predictions = [{"instances": topk_preds}]

    if evaluator._output_dir:
        PathManager.mkdirs(evaluator._output_dir)
        file_path = os.path.join(
            evaluator._output_dir, f"instances_predictions_rank{comm.get_rank()}.pth"
        )
        with PathManager.open(file_path, "wb") as f:
            torch.save(evaluator._predictions, f)

    # Necessary to avoid timeout when gathering?
    comm.synchronize()
    # Limit number of detections per category across workers.
    predictions = comm.gather(evaluator._predictions, dst=0)
    if comm.is_main_process():
        predictions = list(itertools.chain(*predictions))
        by_cat = defaultdict(list)
        for pred in predictions:
            for ann in pred["instances"]:
                by_cat[ann["category_id"]].append(ann)
        logger.info(f"Max per cat: {max([len(v) for v in by_cat.values()])}")
        logger.info(f"Min per cat: {min([len(v) for v in by_cat.values()])}")
        topk_preds = []
        for c, anns in by_cat.items():
            topk_preds.extend(
                sorted(anns, key=lambda a: a["score"], reverse=True)[: topk[c]]
            )
        evaluator._predictions = [{"instances": topk_preds}]
    else:
        evaluator._predictions = []

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = time.perf_counter() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
            total_time_str, total_time / (total - num_warmup), num_devices
        )
    )
    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
            total_compute_time_str,
            total_compute_time / (total - num_warmup),
            num_devices,
        )
    )

    results = evaluator.evaluate()
    # An evaluator may return None when not in main process.
    # Replace it by an empty dict instead to make it easier for downstream code to handle
    if results is None:
        results = {}
    return results

コード例 #12

0

ファイルを表示

ファイル: gdrn_evaluator.py プロジェクト: hz-ants/GDR-Net

def save_result_of_dataset(cfg, model, data_loader, output_dir, dataset_name):
    """
    Run model (in eval mode) on the data_loader and save predictions
    Args:
        cfg: config
        model (nn.Module): a module which accepts an object from
            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.

            If you wish to evaluate a model in `training` mode instead, you can
            wrap the given model and override its behavior of `.eval()` and `.train()`.
        data_loader: an iterable object with a length.
            The elements it generates will be the inputs to the model.
    Returns:
        The return value of `evaluator.evaluate()`
    """
    cpu_device = torch.device("cpu")
    num_devices = get_world_size()
    logger = logging.getLogger(__name__)
    logger.info("Start inference on {} images".format(len(data_loader)))

    # NOTE: dataset name should be the same as TRAIN to get the correct meta
    _metadata = MetadataCatalog.get(dataset_name)
    data_ref = ref.__dict__[_metadata.ref_key]
    obj_names = _metadata.objs
    obj_ids = [data_ref.obj2id[obj_name] for obj_name in obj_names]

    result_name = "results.pkl"
    mmcv.mkdir_or_exist(output_dir)
    result_path = osp.join(output_dir, result_name)

    total = len(data_loader)  # inference data loader must have a fixed length
    results = OrderedDict()
    VIS = False

    logging_interval = 50
    num_warmup = min(5, logging_interval - 1, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0
    with inference_context(model), torch.no_grad():
        for idx, inputs in enumerate(data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            if VIS:
                images_ori = [_input["image"].clone() for _input in inputs]
            start_compute_time = time.perf_counter()
            outputs = model(inputs)  # NOTE: do model inference
            torch.cuda.synchronize()
            cur_compute_time = time.perf_counter() - start_compute_time
            total_compute_time += cur_compute_time

            # NOTE: process results
            for i in range(len(inputs)):
                _input = inputs[i]
                output = outputs[i]
                cur_results = {}
                instances = output["instances"]
                HAS_MASK = False
                if instances.has("pred_masks"):
                    HAS_MASK = True
                    pred_masks = instances.pred_masks  # (#objs, imH, imW)
                    pred_masks = pred_masks.detach().cpu().numpy()
                    # NOTE: time comsuming step
                    rles = [
                        binary_mask_to_rle(pred_masks[_k])
                        for _k in range(len(pred_masks))
                    ]

                instances = instances.to(cpu_device)
                boxes = instances.pred_boxes.tensor.clone().detach().cpu(
                ).numpy()  # xyxy

                scores = instances.scores.tolist()
                labels = instances.pred_classes.detach().cpu().numpy()

                obj_ids = [
                    data_ref.obj2id[obj_names[int(label)]] for label in labels
                ]
                ego_quats = instances.pred_ego_quats.detach().cpu().numpy()
                ego_rots = [
                    quat2mat(ego_quats[k]) for k in range(len(ego_quats))
                ]
                transes = instances.pred_transes.detach().cpu().numpy()

                cur_results = {
                    "time": cur_compute_time / len(inputs),
                    "obj_ids": obj_ids,
                    "scores": scores,
                    "boxes": boxes,  # xyxy
                    "Rs": ego_rots,
                    "ts": transes,  # m
                }
                if HAS_MASK:
                    cur_results["masks"] = rles

                if VIS:
                    import cv2
                    from lib.vis_utils.image import vis_image_mask_bbox_cv2

                    image = (images_ori[i].detach().cpu().numpy().transpose(
                        1, 2, 0) + 0.5).astype("uint8")
                    img_vis = vis_image_mask_bbox_cv2(
                        image,
                        pred_masks,
                        boxes,
                        labels=[obj_names[int(label)] for label in labels])
                    cv2.imshow("img", img_vis.astype("uint8"))
                    cv2.waitKey()
                results[_input["scene_im_id"]] = cur_results

            if (idx + 1) % logging_interval == 0:
                duration = time.perf_counter() - start_time
                seconds_per_img = duration / (idx + 1 - num_warmup)
                eta = datetime.timedelta(seconds=int(seconds_per_img *
                                                     (total - num_warmup) -
                                                     duration))
                logger.info(
                    "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)))

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = int(time.perf_counter() - start_time)
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        "Total inference time: {} ({:.6f} s / img per device, on {} devices)".
        format(total_time_str, total_time / (total - num_warmup), num_devices))
    total_compute_time_str = str(
        datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)"
        .format(total_compute_time_str,
                total_compute_time / (total - num_warmup), num_devices))

    mmcv.dump(results, result_path)
    logger.info("Results saved to {}".format(result_path))

コード例 #13

0

ファイルを表示

ファイル: gdrn_evaluator.py プロジェクト: hz-ants/GDR-Net

def gdrn_inference_on_dataset(cfg,
                              model,
                              data_loader,
                              evaluator,
                              amp_test=False):
    """Run model on the data_loader and evaluate the metrics with evaluator.
    Also benchmark the inference speed of `model.forward` accurately. The model
    will be used in eval mode.

    Args:
        model (nn.Module): a module which accepts an object from
            `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.

            If you wish to evaluate a model in `training` mode instead, you can
            wrap the given model and override its behavior of `.eval()` and `.train()`.
        data_loader: an iterable object with a length.
            The elements it generates will be the inputs to the model.
        evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
            to benchmark, but don't want to do any evaluation.

    Returns:
        The return value of `evaluator.evaluate()`
    """
    num_devices = get_world_size()
    logger = logging.getLogger(__name__)
    logger.info("Start inference on {} images".format(len(data_loader)))

    total = len(data_loader)  # inference data loader must have a fixed length
    if evaluator is None:
        # create a no-op evaluator
        evaluator = DatasetEvaluators([])
    evaluator.reset()

    num_warmup = min(5, total - 1)
    start_time = time.perf_counter()
    total_compute_time = 0
    total_process_time = 0
    with inference_context(model), torch.no_grad():
        for idx, inputs in enumerate(data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
                total_process_time = 0

            start_compute_time = time.perf_counter()
            #############################
            # process input
            batch = batch_data(cfg, inputs, phase="test")
            if evaluator.train_objs is not None:
                roi_labels = batch["roi_cls"].cpu().numpy().tolist()
                obj_names = [evaluator.obj_names[_l] for _l in roi_labels]
                if all(_obj not in evaluator.train_objs for _obj in obj_names):
                    continue

            # if cfg.DEBUG:
            #     for i in range(len(batch["roi_cls"])):
            #         vis_roi_im = batch["roi_img"][i].cpu().numpy().transpose(1,2,0)[:, :, ::-1]
            #         show_ims = [vis_roi_im]
            #         show_titles = ["roi_im"]
            #
            #         vis_coor2d = batch["roi_coord_2d"][i].cpu().numpy()
            #         show_ims.extend([vis_coor2d[0], vis_coor2d[1]])
            #         show_titles.extend(["coord_2d_x", "coord_2d_y"])
            #         grid_show(show_ims, show_titles, row=1, col=3)

            with autocast(enabled=amp_test):
                out_dict = model(
                    batch["roi_img"],
                    roi_classes=batch["roi_cls"],
                    roi_cams=batch["roi_cam"],
                    roi_whs=batch["roi_wh"],
                    roi_centers=batch["roi_center"],
                    resize_ratios=batch["resize_ratio"],
                    roi_coord_2d=batch.get("roi_coord_2d", None),
                    roi_extents=batch.get("roi_extent", None),
                )
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            cur_compute_time = time.perf_counter() - start_compute_time
            total_compute_time += cur_compute_time
            # NOTE: added
            # TODO: add detection time here
            outputs = [{} for _ in range(len(inputs))]
            for _i in range(len(outputs)):
                outputs[_i]["time"] = cur_compute_time

            start_process_time = time.perf_counter()
            evaluator.process(inputs, outputs, out_dict)  # RANSAC/PnP
            cur_process_time = time.perf_counter() - start_process_time
            total_process_time += cur_process_time

            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() -
                                         start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img *
                                                     (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    f"Inference done {idx+1}/{total}. {seconds_per_img:.4f} s / img. ETA={str(eta)}",
                    n=5)

    # Measure the time only for this worker (before the synchronization barrier)
    total_time = time.perf_counter() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    # NOTE this format is parsed by grep
    logger.info(
        f"Total inference time: {total_time_str} "
        f"({total_time / (total - num_warmup):.6f} s / img per device, on {num_devices} devices)"
    )
    # pure forward time
    total_compute_time_str = str(
        datetime.timedelta(seconds=int(total_compute_time)))
    logger.info(
        "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)"
        .format(total_compute_time_str,
                total_compute_time / (total - num_warmup), num_devices))
    # post_process time
    total_process_time_str = str(
        datetime.timedelta(seconds=int(total_process_time)))
    logger.info(
        "Total inference post process time: {} ({:.6f} s / img per device, on {} devices)"
        .format(total_process_time_str,
                total_process_time / (total - num_warmup), num_devices))

    results = evaluator.evaluate()  # results is always None
    # An evaluator may return None when not in main process.
    # Replace it by an empty dict instead to make it easier for downstream code to handle
    if results is None:
        results = {}
    return results

コード例 #14

0

ファイルを表示

ファイル: extract_region_feature.py プロジェクト: gyq716/image-captioning-DLCT

import os
from detectron2.structures import Boxes


save_dir = '/home/luoyp/disk1/grid-feats-vqa/feats'
region_before = h5py.File(os.path.join(save_dir,'region_before_X152.hdf5'),'w')
# region_after = h5py.File(os.path.join(save_dir,'region_after.hdf5'),'w')
# grid7 = h5py.File(os.path.join(save_dir,'my_grid7.hdf5'),'w')
# original_grid = h5py.File(os.path.join(save_dir,'original_grid7.hdf5'),'w')

thresh = 0.2
max_regions = 100
pooling = torch.nn.AdaptiveAvgPool2d((7,7))
image_id_collector = []
for dataset_name in ['coco_2014_train','coco_2014_val']:
    with inference_context(model):
        dump_folder = os.path.join(cfg.OUTPUT_DIR, "features", dataset_to_folder_mapper[dataset_name])
        PathManager.mkdirs(dump_folder)
        data_loader = build_detection_test_loader_with_attributes(cfg, dataset_name)
        for idx, inputs in enumerate(tqdm.tqdm(data_loader)):
            with torch.no_grad():
                image_id = inputs[0]['image_id']
                file_name = '%d.pth' % image_id
                images = model.preprocess_image(inputs)
                features = model.backbone(images.tensor)

                proposals, _ = model.proposal_generator(images, features)
                proposal_boxes = [x.proposal_boxes for x in proposals]

                features = [features[f] for f in model.roi_heads.in_features]
                box_features1 = model.roi_heads.box_pooler(features, [x.proposal_boxes for x in proposals])

コード例 #15

0

ファイルを表示

def evaluate_test(model, data_loader, vis_preds=False):
    """
    This function evaluates the model on the dataset defined by data_loader.
    The metrics reported are described in Table 2 of our paper.
    """
    # Note that all eval runs on main process
    assert comm.is_main_process()
    deprocess = imagenet_deprocess(rescale_image=False)
    device = torch.device("cuda:0")
    # evaluation
    class_names = {
        "02828884": "bench",
        "03001627": "chair",
        "03636649": "lamp",
        "03691459": "speaker",
        "04090263": "firearm",
        "04379243": "table",
        "04530566": "watercraft",
        "02691156": "plane",
        "02933112": "cabinet",
        "02958343": "car",
        "03211117": "monitor",
        "04256520": "couch",
        "04401088": "cellphone",
    }

    num_instances = {i: 0 for i in class_names}
    chamfer = {i: 0 for i in class_names}
    normal = {i: 0 for i in class_names}
    f1_01 = {i: 0 for i in class_names}
    f1_03 = {i: 0 for i in class_names}
    f1_05 = {i: 0 for i in class_names}

    num_batch_evaluated = 0
    for batch in data_loader:
        batch = data_loader.postprocess(batch, device)
        sids = [id_str.split("-")[0] for id_str in batch["id_strs"]]
        for sid in sids:
            num_instances[sid] += 1

        with inference_context(model):
            model_kwargs = {}
            module = model.module if hasattr(model, "module") else model
            if isinstance(module, VoxMeshMultiViewHead):
                model_kwargs["intrinsics"] = batch["intrinsics"]
                model_kwargs["extrinsics"] = batch["extrinsics"]
            if isinstance(module, VoxMeshDepthHead):
                model_kwargs["masks"] = batch["masks"]

            model_outputs = model(batch["imgs"], **model_kwargs)
            voxel_scores = model_outputs["voxel_scores"]
            meshes_pred = model_outputs["meshes_pred"]

            cur_metrics = compare_meshes(meshes_pred[-1], batch["meshes"], reduce=False)
            cur_metrics["verts_per_mesh"] = meshes_pred[-1].num_verts_per_mesh().cpu()
            cur_metrics["faces_per_mesh"] = meshes_pred[-1].num_faces_per_mesh().cpu()

            for i, sid in enumerate(sids):
                chamfer[sid] += cur_metrics["Chamfer-L2"][i].item()
                normal[sid] += cur_metrics["AbsNormalConsistency"][i].item()
                f1_01[sid] += cur_metrics["F1@%f" % 0.1][i].item()
                f1_03[sid] += cur_metrics["F1@%f" % 0.3][i].item()
                f1_05[sid] += cur_metrics["F1@%f" % 0.5][i].item()

                if vis_preds:
                    img = image_to_numpy(deprocess(batch["imgs"][i]))
                    vis_utils.visualize_prediction(
                        batch["id_strs"][i], img, meshes_pred[-1][i], "/tmp/output"
                    )

            num_batch_evaluated += 1
            logger.info("Evaluated %d / %d batches" % (num_batch_evaluated, len(data_loader)))

    vis_utils.print_instances_class_histogram(
        num_instances,
        class_names,
        {"chamfer": chamfer, "normal": normal, "f1_01": f1_01, "f1_03": f1_03, "f1_05": f1_05},
    )