Ejemplo n.º 1
0
def main():
    # Usage: python visualize.py <config_path> <checkpoint_path>
    parser = argparse.ArgumentParser()
    parser.add_argument("config_path",
                        type=str,
                        help="Path to the configuration file")
    parser.add_argument("checkpoint_path",
                        type=str,
                        help="Path to the model checkpoint file")
    args = parser.parse_args()

    cfg = torchie.Config.fromfile(args.config_path)
    cfg.data.val.test_mode = True

    # build the dataloader
    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
        dataset,
        batch_size=1,
        workers_per_gpu=1,
        dist=False,
        shuffle=False,
    )

    # build the model and load checkpoint
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    checkpoint = load_checkpoint(model,
                                 args.checkpoint_path,
                                 map_location="cpu")
    model = MegDataParallel(model, device_ids=[0])

    device = torch.device("cuda")
    visualize(model, data_loader, device)
Ejemplo n.º 2
0
def main():
    # config
    cfg = Config.fromfile(CONFIG_FILE)
    # model loading
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    checkpoint = load_checkpoint(model, CHECK_POINT, map_location="cpu")
    model = model.cuda()
    model.eval()
    # data loader
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False,
    )
    # infer
    detections = []
    for i, data_batch in enumerate(data_loader):
        print("step:", i)
        with torch.no_grad():
            outputs = batch_processor(
                model,
                data_batch,
                train_mode=False,
                local_rank=0,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
    all_predictions = all_gather(detections)
def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   logger=None):
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # start training
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.samples_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=distributed) for ds in dataset
    ]

    total_steps = cfg.total_epochs * len(data_loaders[0])
    # print(f"total_steps: {total_steps}")

    if cfg.lr_config.type == "one_cycle":
        # build trainer
        optimizer = build_one_cycle_optimizer(model, cfg.optimizer)
        lr_scheduler = _create_learning_rate_scheduler(optimizer,
                                                       cfg.lr_config,
                                                       total_steps)
        cfg.lr_config = None
    else:
        optimizer = build_optimizer(model, cfg.optimizer)
        lr_scheduler = None

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        model = model.cuda()

    logger.info(f"model structure: {model}")

    trainer = Trainer(model, batch_processor, optimizer, lr_scheduler,
                      cfg.work_dir, cfg.log_level)

    if distributed:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.optimizer_config

    # register hooks
    trainer.register_training_hooks(cfg.lr_config, optimizer_config,
                                    cfg.checkpoint_config, cfg.log_config)

    if distributed:
        trainer.register_hook(DistSamplerSeedHook())

    # # register eval hooks
    # if validate:
    #     val_dataset_cfg = cfg.data.val
    #     eval_cfg = cfg.get('evaluation', {})
    #     dataset_type = DATASETS.get(val_dataset_cfg.type)
    #     trainer.register_hook(
    #         KittiEvalmAPHookV2(val_dataset_cfg, **eval_cfg))

    if cfg.resume_from:
        trainer.resume(cfg.resume_from)
    elif cfg.load_from:
        trainer.load_checkpoint(cfg.load_from)

    trainer.run(data_loaders,
                cfg.workflow,
                cfg.total_epochs,
                local_rank=cfg.local_rank)
Ejemplo n.º 4
0
def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   logger=None):
    # build logger
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # build dataloaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    '''
    batch_size = cfg.data.samples_per_gpu
    num_workers = cfg.data.workers_per_gpu
    data_loaders = [DataLoader(ds, batch_size=batch_size, sampler=None, shuffle=True, num_workers=num_workers, collate_fn=collate_kitti, pin_memory=False,) for ds in dataset]  # TODO change pin_memory
    '''
    if cfg.my_paras.get("enable_ssl", False):
        data_loaders = [
            build_dataloader(dataset[0],
                             4,
                             cfg.data.workers_per_gpu,
                             dist=distributed)
        ]
        data_loaders.append(
            build_dataloader(dataset[1],
                             4,
                             cfg.data.workers_per_gpu,
                             dist=distributed))
        data_loaders.append(
            build_dataloader(dataset[2],
                             4,
                             cfg.data.workers_per_gpu,
                             dist=distributed))
    else:
        data_loaders = [
            build_dataloader(ds,
                             cfg.data.samples_per_gpu,
                             cfg.data.workers_per_gpu,
                             dist=distributed) for ds in dataset
        ]

    # build optimizer and lr_scheduler
    total_steps = cfg.total_epochs * len(data_loaders[0])
    if cfg.lr_config.type in ["one_cycle", "multi_phase"]:
        optimizer = build_one_cycle_optimizer(model, cfg.optimizer)
        lr_scheduler = _create_learning_rate_scheduler(
            optimizer, cfg.lr_config,
            total_steps)  # todo: will not register lr_hook in trainer
        cfg.lr_config = None
    else:  # todo: we can add our own optimizer here
        optimizer = build_optimizer(model, cfg.optimizer)
        lr_scheduler = None

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        model = model.cuda()
    logger.info(f"model structure: {model}")

    model_ema = copy.deepcopy(model)
    for param in model_ema.parameters():
        param.detach_()

    # build trainer
    trainer = Trainer(model, model_ema, batch_processor, optimizer,
                      lr_scheduler, cfg.work_dir, cfg.log_level)

    if distributed:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.optimizer_config

    # register hooks
    trainer.register_training_hooks(cfg.lr_config, optimizer_config,
                                    cfg.checkpoint_config, cfg.log_config)

    if distributed:
        trainer.register_hook(DistSamplerSeedHook())

    # training setting
    if cfg.resume_from:
        trainer.resume(cfg.resume_from)
    elif cfg.load_from and cfg.my_paras.enable_ssl:
        trainer.load_checkpoint_from_scratch(cfg.load_from)

    trainer.run(data_loaders,
                cfg.workflow,
                cfg.total_epochs,
                local_rank=cfg.local_rank)
Ejemplo n.º 5
0
def main():
    args = parse_args()

    assert args.out or args.show or args.json_out, (
        "Please specify at least one operation (save or show the results) "
        'with the argument "--out" or "--show" or "--json_out"'
    )

    if args.out is not None and not args.out.endswith((".pkl", ".pickle")):
        raise ValueError("The output file must be a pkl file.")

    if args.json_out is not None and args.json_out.endswith(".json"):
        args.json_out = args.json_out[:-5]

    cfg = torchie.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get("cudnn_benchmark", False):
        torch.backends.cudnn.benchmark = True

    # cfg.model.pretrained = None
    cfg.data.test.test_mode = True
#     cfg.data.val.test_mode = True

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == "none":
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
#     dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    # build the model and load checkpoint
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")
    # old versions did not save class info in checkpoints, this walkaround is
    # for backward compatibility
    if "CLASSES" in checkpoint["meta"]:
        model.CLASSES = checkpoint["meta"]["CLASSES"]
    else:
        model.CLASSES = dataset.CLASSES

    model = MegDataParallel(model, device_ids=[0])
    result_dict, detections = test(
        data_loader, model, save_dir=None, distributed=distributed
    )

    for k, v in result_dict["results"].items():
        print(f"Evaluation {k}: {v}")

    rank, _ = get_dist_info()
    if args.out and rank == 0:
        print("\nwriting results to {}".format(args.out))
        torchie.dump(detections, args.out)

    if args.txt_result:
        res_dir = os.path.join(os.getcwd(), "predictions")
        for dt in detections:
            with open(
                os.path.join(res_dir, "%06d.txt" % int(dt["metadata"]["token"])), "w"
            ) as fout:
                lines = kitti.annos_to_kitti_label(dt)
                for line in lines:
                    fout.write(line + "\n")

        ap_result_str, ap_dict = kitti_evaluate(
            "/data/Datasets/KITTI/Kitti/object/training/label_2",
            res_dir,
            label_split_file="/data/Datasets/KITTI/Kitti/ImageSets/val.txt",
            current_class=0,
        )

        print(ap_result_str)
Ejemplo n.º 6
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = torch.cuda.device_count() > 1

    if distributed:
        if args.launcher == "pytorch":
            torch.cuda.set_device(args.local_rank)
            torch.distributed.init_process_group(backend="nccl", init_method="env://")
            cfg.local_rank = args.local_rank
        elif args.launcher == "slurm":
            proc_id = int(os.environ["SLURM_PROCID"])
            ntasks = int(os.environ["SLURM_NTASKS"])
            node_list = os.environ["SLURM_NODELIST"]
            num_gpus = torch.cuda.device_count()
            cfg.gpus = num_gpus
            torch.cuda.set_device(proc_id % num_gpus)
            addr = subprocess.getoutput(
                f"scontrol show hostname {node_list} | head -n1")
            # specify master port
            port = None
            if port is not None:
                os.environ["MASTER_PORT"] = str(port)
            elif "MASTER_PORT" in os.environ:
                pass  # use MASTER_PORT in the environment variable
            else:
                # 29500 is torch.distributed default port
                os.environ["MASTER_PORT"] = "29501"
            # use MASTER_ADDR in the environment variable if it already exists
            if "MASTER_ADDR" not in os.environ:
                os.environ["MASTER_ADDR"] = addr
            os.environ["WORLD_SIZE"] = str(ntasks)
            os.environ["LOCAL_RANK"] = str(proc_id % num_gpus)
            os.environ["RANK"] = str(proc_id)

            dist.init_process_group(backend="nccl")
            cfg.local_rank = int(os.environ["LOCAL_RANK"])

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        # model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        # model = fuse_bn_recursively(model)
        model = model.cuda()

    model.eval()
    mode = "val"

    prog_bar = None
    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 /3)

    time_start = 0
    time_end = 0

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            outputs = batch_processor(
                model, data_batch, train_mode=False, local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                    "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update(
                {token: output,}
            )
            if args.local_rank == 0:
                if prog_bar is not None:
                    prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end -  time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions), output_dir=args.work_dir, testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"
Ejemplo n.º 7
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")

    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    for i, data_batch in enumerate(data_loader):
        with torch.no_grad():
            outputs = batch_processor(
                model,
                data_batch,
                train_mode=False,
                local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    result_dict, _ = dataset.evaluation(predictions, output_dir=args.work_dir)

    for k, v in result_dict["results"].items():
        print(f"Evaluation {k}: {v}")

    if args.txt_result:
        res_dir = os.path.join(os.getcwd(), "predictions")
        for k, dt in predictions.items():
            with open(
                    os.path.join(res_dir,
                                 "%06d.txt" % int(dt["metadata"]["token"])),
                    "w") as fout:
                lines = kitti.annos_to_kitti_label(dt)
                for line in lines:
                    fout.write(line + "\n")

        ap_result_str, ap_dict = kitti_evaluate(
            "/data/Datasets/KITTI/Kitti/object/training/label_2",
            res_dir,
            label_split_file="/data/Datasets/KITTI/Kitti/ImageSets/val.txt",
            current_class=0,
        )

        print(ap_result_str)
Ejemplo n.º 8
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        # model = fuse_bn_recursively(model)
        model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 / 3)

    time_start = 0
    time_end = 0

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            outputs = batch_processor(
                model,
                data_batch,
                train_mode=False,
                local_rank=args.local_rank,
            )
        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end - time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)
    with open(os.path.join(args.work_dir, 'prediction.pkl'), 'rb') as f:
        predictions = pickle.load(f)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions),
                                        output_dir=args.work_dir,
                                        testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"
Ejemplo n.º 9
0
def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   logger=None):
    # build logger
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # build dataloaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    '''
    batch_size = cfg.data.samples_per_gpu
    num_workers = cfg.data.workers_per_gpu
    data_loaders = [DataLoader(ds, batch_size=batch_size, sampler=None, shuffle=True, num_workers=num_workers, collate_fn=collate_kitti, pin_memory=False,) for ds in dataset]  # TODO change pin_memory
    '''
    data_loaders = [
        build_dataloader(ds,
                         cfg.data.samples_per_gpu,
                         cfg.data.workers_per_gpu,
                         dist=distributed) for ds in dataset
    ]

    # build optimizer and lr_scheduler
    total_steps = cfg.total_epochs * len(data_loaders[0])
    if cfg.lr_config.type == "one_cycle":
        optimizer = build_one_cycle_optimizer(model, cfg.optimizer)
        lr_scheduler = _create_learning_rate_scheduler(
            optimizer, cfg.lr_config,
            total_steps)  # todo: will not register lr_hook in trainer
        cfg.lr_config = None
    else:  # todo: we can add our own optimizer here
        optimizer = build_optimizer(model, cfg.optimizer)
        lr_scheduler = None

    # put model on gpus
    if distributed:
        model = apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(
            model.cuda(cfg.local_rank),
            device_ids=[cfg.local_rank],
            output_device=cfg.local_rank,
            # broadcast_buffers=False,
            find_unused_parameters=True,
        )
    else:
        model = model.cuda()
    logger.info(f"model structure: {model}")

    # build trainer
    trainer = Trainer(model, batch_processor, optimizer, lr_scheduler,
                      cfg.work_dir, cfg.log_level)

    if distributed:
        optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.optimizer_config

    # register hooks
    #import ipdb; ipdb.set_trace()
    trainer.register_training_hooks(cfg.lr_config, optimizer_config,
                                    cfg.checkpoint_config, cfg.log_config)

    if distributed:
        trainer.register_hook(DistSamplerSeedHook())

    # # register eval hooks
    # if validate:
    #     val_dataset_cfg = cfg.data.val
    #     eval_cfg = cfg.get('evaluation', {})
    #     dataset_type = DATASETS.get(val_dataset_cfg.type)
    #     trainer.register_hook(KittiEvalmAPHookV2(val_dataset_cfg, **eval_cfg))

    # training setting
    if cfg.resume_from:
        trainer.resume(cfg.resume_from)
    elif cfg.load_from:
        trainer.load_checkpoint(cfg.load_from)

    trainer.run(data_loaders,
                cfg.workflow,
                cfg.total_epochs,
                local_rank=cfg.local_rank)
Ejemplo n.º 10
0
def main():

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(0)

    args = parse_args()

    cfg = Config.fromfile(args.config)
    cfg.local_rank = args.local_rank

    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    distributed = False
    if "WORLD_SIZE" in os.environ:
        distributed = int(os.environ["WORLD_SIZE"]) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

        cfg.gpus = torch.distributed.get_world_size()
    else:
        cfg.gpus = args.gpus

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info("Distributed testing: {}".format(distributed))
    logger.info(
        f"torch.backends.cudnn.benchmark: {torch.backends.cudnn.benchmark}")

    torch.cuda.empty_cache()

    model = build_detector(cfg.nohead_model,
                           train_cfg=None,
                           test_cfg=cfg.test_cfg)

    if args.testset:
        print("Use Test Set")
        dataset = build_dataset(cfg.data.test)
    else:
        print("Use Val Set")
        dataset = build_dataset(cfg.data.val)

    data_loader = build_dataloader(
        dataset,
        batch_size=cfg.data.samples_per_gpu if not args.speed_test else 1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False,
    )

    checkpoint = load_checkpoint(model, args.checkpoint, map_location="cpu")

    # put model on gpus
    # model = fuse_bn_recursively(model)
    model = model.cuda()

    model.eval()
    mode = "val"

    logger.info(f"work dir: {args.work_dir}")
    if cfg.local_rank == 0:
        prog_bar = torchie.ProgressBar(len(data_loader.dataset) // cfg.gpus)

    detections = {}
    cpu_device = torch.device("cpu")

    start = time.time()

    start = int(len(dataset) / 3)
    end = int(len(dataset) * 2 / 3)

    time_start = 0
    time_end = 0

    device = torch.device(args.local_rank)

    POINTS_NUM = 2

    for i, data_batch in enumerate(data_loader):
        if i == start:
            torch.cuda.synchronize()
            time_start = time.time()

        if i == end:
            torch.cuda.synchronize()
            time_end = time.time()

        with torch.no_grad():
            sample = example_to_device(data_batch, device=device)
            for i in range(len(sample["metadata"])):
                sample["metadata"][i]['image_prefix'] = None
            del sample["metadata"]
            del sample["points"]
            #del sample["shape"]
            sample["shape"] = torch.tensor(sample["shape"])
            sample["voxels"] = sample["voxels"][0:POINTS_NUM, :, :]
            sample["num_points"] = sample["num_points"][0:POINTS_NUM]
            sample["coordinates"] = sample["coordinates"][0:POINTS_NUM, :]

            outputs = model(sample, return_loss=False)
            #outputs = batch_processor(
            #    model, data_batch, train_mode=False, local_rank=args.local_rank,
            #)

        for k, t in sample.items():
            print("====", k)
            print(t.shape)

        print("============== start =============")

        register_custom_op_symbolic("spconv::get_indice_pairs_3d",
                                    symbolic_get_indice_pairs_3d, 11)

        torch.onnx.export(
            model,  # model being run
            sample,  # model input (or a tuple for multiple inputs)
            "/workspace/data/center_point.onnx",  # where to save the model (can be a file or file-like object)
            export_params=
            True,  # store the trained parameter weights inside the model file
            opset_version=11,  # the ONNX version to export the model to
            do_constant_folding=True
        )  # whether to execute constant folding for optimization

        print("============== finish =============")

        break

        for output in outputs:
            token = output["metadata"]["token"]
            for k, v in output.items():
                if k not in [
                        "metadata",
                ]:
                    output[k] = v.to(cpu_device)
            detections.update({
                token: output,
            })
            if args.local_rank == 0:
                prog_bar.update()

    synchronize()

    all_predictions = all_gather(detections)

    print("\n Total time per frame: ", (time_end - time_start) / (end - start))

    if args.local_rank != 0:
        return

    predictions = {}
    for p in all_predictions:
        predictions.update(p)

    if not os.path.exists(args.work_dir):
        os.makedirs(args.work_dir)

    save_pred(predictions, args.work_dir)

    result_dict, _ = dataset.evaluation(copy.deepcopy(predictions),
                                        output_dir=args.work_dir,
                                        testset=args.testset)

    if result_dict is not None:
        for k, v in result_dict["results"].items():
            print(f"Evaluation {k}: {v}")

    if args.txt_result:
        assert False, "No longer support kitti"