def compute_on_dataset(model, data_loader, device, timer=None, show=False):
    model.eval()
    results_dict = []
    cpu_device = torch.device("cpu")

    results_dict = {}
    prog_bar = torchie.ProgressBar(len(data_loader.dataset))
    for i, batch in enumerate(data_loader):
        # example = example_convert_to_torch(batch, device=device)
        example = example_to_device(batch, device=device)
        with torch.no_grad():
            outputs = model(example, return_loss=False, rescale=not show)
            for output in outputs:
                token = output["metadata"]["token"]
                for k, v in output.items():
                    if k not in [
                        "metadata",
                    ]:
                        output[k] = v.to(cpu_device)
                results_dict.update(
                    {token: output,}
                )
                prog_bar.update()

    return results_dict
Exemple #2
0
def compute_on_dataset(model, data_loader, device, timer=None, show=False):
    '''
        Get predictions by model inference.
            - output: ['box3d_lidar', 'scores', 'label_preds', 'metadata'];
            - detections: type: dict, length: 3769, keys: image_ids, detections[image_id] = output;
    '''
    model.eval()
    cpu_device = torch.device("cpu")

    results_dict = {}

    for i, batch in enumerate(data_loader):
        if i == 1:
            prog_bar = torchie.ProgressBar(len(data_loader.dataset) - 1)
        example = example_to_device(batch, device=device)
        with torch.no_grad():
            outputs = model(example, return_loss=False,
                            rescale=not show)  # list_length=batch_size: 8
            for output in outputs:  # output.keys(): ['box3d_lidar', 'scores', 'label_preds', 'metadata']
                token = output["metadata"][
                    "token"]  # token should be the image_id
                for k, v in output.items():
                    if k not in [
                            "metadata",
                    ]:
                        output[k] = v.to(cpu_device)
                results_dict.update({
                    token: output,
                })
                if i >= 1:
                    prog_bar.update()

    return results_dict
Exemple #3
0
    def val(self, data_loader, **kwargs):
        self.model.eval()
        self.mode = "val"
        self.data_loader = data_loader
        self.call_hook("before_val_epoch")

        self.logger.info(f"work dir: {self.work_dir}")

        if self.rank == 0:
            prog_bar = torchie.ProgressBar(len(data_loader.dataset))

        detections = {}
        cpu_device = torch.device("cpu")

        for i, data_batch in enumerate(data_loader):
            self._inner_iter = i
            self.call_hook("before_val_iter")
            with torch.no_grad():
                outputs = self.batch_processor(self.model,
                                               data_batch,
                                               train_mode=False,
                                               **kwargs)
            for output in outputs:
                token = output["metadata"]["token"]
                for k, v in output.items():
                    if k not in [
                            "metadata",
                    ]:
                        output[k] = v.to(cpu_device)
                detections.update({
                    token: output,
                })
                if self.rank == 0:
                    for _ in range(self.world_size):
                        prog_bar.update()

        synchronize()

        all_predictions = all_gather(detections)

        if self.rank != 0:
            return

        predictions = {}
        for p in all_predictions:
            predictions.update(p)

        # torch.save(predictions, "final_predictions_debug.pkl")
        # TODO fix evaluation module
        result_dict, _ = self.data_loader.dataset.evaluation(
            predictions, output_dir=self.work_dir)

        self.logger.info("\n")
        for k, v in result_dict["results"].items():
            self.logger.info(f"Evaluation {k}: {v}")

        self.call_hook("after_val_epoch")
Exemple #4
0
    def after_train_epoch(self, trainer):
        if not self.every_n_epochs(trainer, self.interval):
            return
        trainer.model.eval()
        results = [None for _ in range(len(self.dataset))]
        detections = {}
        if trainer.rank == 0:
            prog_bar = torchie.ProgressBar(len(self.dataset))
        for idx in range(trainer.rank, len(self.dataset), trainer.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(collate_kitti([data], samples_per_gpu=1),
                               [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                output = trainer.model(data_gpu, return_loss=False)

                token = output["metadata"]["token"]
                for k, v in output.items():
                    if k not in [
                            "metadata",
                    ]:
                        output[k] = v.to(cpu_device)
                detections.update({
                    token: output,
                })

            detections[idx] = result

            batch_size = trainer.world_size
            if trainer.rank == 0:
                for _ in range(batch_size):
                    prog_bar.update()

        all_predictions = all_gather(detections)

        if trainer.rank != 0:
            return

        predictions = {}
        for p in all_predictions:
            predictions.update(p)

        result_dict, _ = self.dataset.evaluation(predictions, None)

        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")
Exemple #5
0
    def after_train_epoch(self, trainer):
        if not self.every_n_epochs(trainer, self.interval):
            return
        """
        prepare dataloader
        """
        data_loader = datasets.loader.build_dataloader(self.dataset,
                                                       batch_size=8,
                                                       workers_per_gpu=4,
                                                       dist=False,
                                                       shuffle=False)

        trainer.model.eval()
        detections = {}

        prog_bar = torchie.ProgressBar(len(data_loader.dataset))

        cpu_device = torch.device("cpu")
        for i, batch in enumerate(data_loader):
            with torch.no_grad():
                outputs = trainer.batch_processor(
                    trainer.model,
                    batch,
                    train_mode=False,
                    local_rank=trainer.rank,
                )
            for output in outputs:
                token = output["metadata"]["token"]
                for k, v in output.items():
                    if k not in [
                            "metadata",
                    ]:
                        output[k] = v.to(cpu_device)
                detections.update({
                    token: output,
                })
                prog_bar.update()

        result_dict, _ = self.dataset.evaluation(detections, None)

        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")
Exemple #6
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = torch.cuda.device_count() > 1

    if distributed:
        if args.launcher == "pytorch":
            torch.cuda.set_device(args.local_rank)
            torch.distributed.init_process_group(backend="nccl", init_method="env://")
            cfg.local_rank = args.local_rank
        elif args.launcher == "slurm":
            proc_id = int(os.environ["SLURM_PROCID"])
            ntasks = int(os.environ["SLURM_NTASKS"])
            node_list = os.environ["SLURM_NODELIST"]
            num_gpus = torch.cuda.device_count()
            cfg.gpus = num_gpus
            torch.cuda.set_device(proc_id % num_gpus)
            addr = subprocess.getoutput(
                f"scontrol show hostname {node_list} | head -n1")
            # specify master port
            port = None
            if port is not None:
                os.environ["MASTER_PORT"] = str(port)
            elif "MASTER_PORT" in os.environ:
                pass  # use MASTER_PORT in the environment variable
            else:
                # 29500 is torch.distributed default port
                os.environ["MASTER_PORT"] = "29501"
            # use MASTER_ADDR in the environment variable if it already exists
            if "MASTER_ADDR" not in os.environ:
                os.environ["MASTER_ADDR"] = addr
            os.environ["WORLD_SIZE"] = str(ntasks)
            os.environ["LOCAL_RANK"] = str(proc_id % num_gpus)
            os.environ["RANK"] = str(proc_id)

            dist.init_process_group(backend="nccl")
            cfg.local_rank = int(os.environ["LOCAL_RANK"])

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        # model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        # model = fuse_bn_recursively(model)
        model = model.cuda()

    model.eval()
    mode = "val"

    prog_bar = None
    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 /3)

    time_start = 0
    time_end = 0

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            outputs = batch_processor(
                model, data_batch, train_mode=False, local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                    "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update(
                {token: output,}
            )
            if args.local_rank == 0:
                if prog_bar is not None:
                    prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end -  time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions), output_dir=args.work_dir, testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"
Exemple #7
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")

    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    for i, data_batch in enumerate(data_loader):
        with torch.no_grad():
            outputs = batch_processor(
                model,
                data_batch,
                train_mode=False,
                local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    result_dict, _ = dataset.evaluation(predictions, output_dir=args.work_dir)

    for k, v in result_dict["results"].items():
        print(f"Evaluation {k}: {v}")

    if args.txt_result:
        res_dir = os.path.join(os.getcwd(), "predictions")
        for k, dt in predictions.items():
            with open(
                    os.path.join(res_dir,
                                 "%06d.txt" % int(dt["metadata"]["token"])),
                    "w") as fout:
                lines = kitti.annos_to_kitti_label(dt)
                for line in lines:
                    fout.write(line + "\n")

        ap_result_str, ap_dict = kitti_evaluate(
            "/data/Datasets/KITTI/Kitti/object/training/label_2",
            res_dir,
            label_split_file="/data/Datasets/KITTI/Kitti/ImageSets/val.txt",
            current_class=0,
        )

        print(ap_result_str)
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        # model = fuse_bn_recursively(model)
        model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 / 3)

    time_start = 0
    time_end = 0

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            outputs = batch_processor(
                model,
                data_batch,
                train_mode=False,
                local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end - time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)
    with open(os.path.join(args.work_dir, 'prediction.pkl'), 'rb') as f:
        predictions = pickle.load(f)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions),
                                        output_dir=args.work_dir,
                                        testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    torch.cuda.empty_cache()

    model = build_detector(cfg.nohead_model,
                           train_cfg=None,
                           test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    # model = fuse_bn_recursively(model)
    model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 / 3)

    time_start = 0
    time_end = 0

    device = torch.device(args.local_rank)

    POINTS_NUM = 2

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            sample = example_to_device(data_batch, device=device)
            for i in range(len(sample["metadata"])):
                sample["metadata"][i]['image_prefix'] = None
            del sample["metadata"]
            del sample["points"]
            #del sample["shape"]
            sample["shape"] = torch.tensor(sample["shape"])
            sample["voxels"] = sample["voxels"][0:POINTS_NUM, :, :]
            sample["num_points"] = sample["num_points"][0:POINTS_NUM]
            sample["coordinates"] = sample["coordinates"][0:POINTS_NUM, :]

            outputs = model(sample, return_loss=False)
            #outputs = batch_processor(
            #    model, data_batch, train_mode=False, local_rank=args.local_rank,
            #)

        for k, t in sample.items():
            print("====", k)
            print(t.shape)

        print("============== start =============")

        register_custom_op_symbolic("spconv::get_indice_pairs_3d",
                                    symbolic_get_indice_pairs_3d, 11)

        torch.onnx.export(
            model,  # model being run
            sample,  # model input (or a tuple for multiple inputs)
            "/workspace/data/center_point.onnx",  # where to save the model (can be a file or file-like object)
            export_params=
            True,  # store the trained parameter weights inside the model file
            opset_version=11,  # the ONNX version to export the model to
            do_constant_folding=True
        )  # whether to execute constant folding for optimization

        print("============== finish =============")

        break

        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end - time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions),
                                        output_dir=args.work_dir,
                                        testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"