예제 #1
0
def main(config, model_path: str, output_path: str, input_shape=(320, 320)):
    logger = Logger(local_rank=-1,
                    save_dir=config.save_dir,
                    use_tensorboard=False)

    # Create model and load weights
    model = build_model(config.model)
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)

    # Convert backbone weights for RepVGG models
    if config.model.arch.backbone.name == "RepVGG":
        deploy_config = config.model
        deploy_config.arch.backbone.update({"deploy": True})
        deploy_model = build_model(deploy_config)
        from nanodet.model.backbone.repvgg import repvgg_det_model_convert

        model = repvgg_det_model_convert(model, deploy_model)

    # TorchScript: tracing the model with dummy inputs
    with torch.no_grad():
        dummy_input = torch.zeros(1, 3, input_shape[0],
                                  input_shape[1])  # Batch size = 1
        model.eval().cpu()
        model_traced = torch.jit.trace(model,
                                       example_inputs=dummy_input).eval()
        model_traced.save(output_path)
        print("Finished export to TorchScript")
예제 #2
0
def main(config, model_path, output_path, input_shape=(320, 320)):
    logger = Logger(-1, config.save_dir, False)
    model = build_model(config.model)
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)
    if config.model.arch.backbone.name == 'RepVGG':
        deploy_config = config.model
        deploy_config.arch.backbone.update({'deploy': True})
        deploy_model = build_model(deploy_config)
        from nanodet.model.backbone.repvgg import repvgg_det_model_convert
        model = repvgg_det_model_convert(model, deploy_model)
    dummy_input = torch.autograd.Variable(
        torch.randn(1, 3, input_shape[0], input_shape[1]))
    dynamic_axes = {
        "input": {
            0: "batch_size"
        },
        "output1": {
            0: "batch_size"
        },
        "output2": {
            0: "batch_size"
        },
        "output3": {
            0: "batch_size"
        },
        "output4": {
            0: "batch_size"
        },
        "output5": {
            0: "batch_size"
        },
        "output6": {
            0: "batch_size"
        }
    }

    input_names = ['input']
    output_names = [
        'output1', 'output2', 'output3', 'output4', 'output5', 'output6'
    ]
    torch.onnx.export(model,
                      dummy_input,
                      output_path,
                      verbose=True,
                      keep_initializers_as_inputs=True,
                      opset_version=12,
                      input_names=input_names,
                      output_names=output_names)

    import onnx
    from onnxsim import simplify
    model = onnx.load(output_path)
    # convert model
    model_simp, check = simplify(model)
    onnx.save(model_simp, output_path)
    print('finished exporting onnx ')
예제 #3
0
def main(config, model_path, output_path, input_shape=(320, 320)):
    logger = Logger(-1, config.save_dir, False)
    model = build_model(config.model)
    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)
    if config.model.arch.backbone.name == 'RepVGG':
        deploy_config = config.model
        deploy_config.arch.backbone.update({'deploy': True})
        deploy_model = build_model(deploy_config)
        from nanodet.model.backbone.repvgg import repvgg_det_model_convert
        model = repvgg_det_model_convert(model, deploy_model)
    dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1]))
    torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11)
    print('finished exporting onnx ')
예제 #4
0
 def __init__(self, cfg, model_path, logger, device='cuda:0'):
     self.cfg = cfg
     self.device = device
     model = build_model(cfg.model)
     ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
     load_model_weight(model, ckpt, logger)
     if cfg.model.arch.backbone.name == 'RepVGG':
         deploy_config = cfg.model
         deploy_config.arch.backbone.update({'deploy': True})
         deploy_model = build_model(deploy_config)
         from nanodet.model.backbone.repvgg import repvgg_det_model_convert
         model = repvgg_det_model_convert(model, deploy_model)
     self.model = model.to(device).eval()
     self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
예제 #5
0
def main(config, model_path, output_path, input_shape=(320, 320)):
    logger = Logger(-1, config.save_dir, False)
    model = build_model(config.model)
    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)
    dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1]))
    torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11)
    print('finished exporting onnx ')
예제 #6
0
 def __init__(self, cfg, model_path, logger, device='cuda:0'):
     self.cfg = cfg
     self.device = device
     model = build_model(cfg.model)
     ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
     load_model_weight(model, ckpt, logger)
     self.model = model.to(device).eval()
     self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
예제 #7
0
def main(config, input_shape=(320, 320)):
    model = build_model(config.model)
    try:
        import mobile_cv.lut.lib.pt.flops_utils as flops_utils
    except ImportError:
        print("mobile-cv is not installed. Skip flops calculation.")
        return
    first_batch = torch.rand((1, 3, input_shape[0], input_shape[1]))
    input_args = (first_batch, )
    flops_utils.print_model_flops(model, input_args)
예제 #8
0
def main(args):
    warnings.warn(
        'Warning! Old testing code is deprecated and will be deleted '
        'in next version. Please use tools/test.py')
    load_config(cfg, args.config)
    local_rank = -1
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    cfg.defrost()
    timestr = datetime.datetime.now().__format__('%Y%m%d%H%M%S')
    cfg.save_dir = os.path.join(cfg.save_dir, timestr)
    cfg.freeze()
    mkdir(local_rank, cfg.save_dir)
    logger = Logger(local_rank, cfg.save_dir)

    logger.log('Creating model...')
    model = build_model(cfg.model)

    logger.log('Setting up data...')
    val_dataset = build_dataset(cfg.data.val, args.task)
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=False,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=collate_function,
        drop_last=True)
    trainer = build_trainer(local_rank, cfg, model, logger)
    cfg.schedule.update({'load_model': args.model})
    trainer.load_model(cfg)
    evaluator = build_evaluator(cfg, val_dataset)
    logger.log('Starting testing...')
    with torch.no_grad():
        results, val_loss_dict = trainer.run_epoch(0,
                                                   val_dataloader,
                                                   mode=args.task)
    if args.task == 'test':
        res_json = evaluator.results2json(results)
        json_path = os.path.join(cfg.save_dir,
                                 'results{}.json'.format(timestr))
        json.dump(res_json, open(json_path, 'w'))
    elif args.task == 'val':
        eval_results = evaluator.evaluate(results,
                                          cfg.save_dir,
                                          rank=local_rank)
        if args.save_result:
            txt_path = os.path.join(cfg.save_dir,
                                    "eval_results{}.txt".format(timestr))
            with open(txt_path, "a") as f:
                for k, v in eval_results.items():
                    f.write("{}: {}\n".format(k, v))
예제 #9
0
파일: train.py 프로젝트: AlbertiPot/nanodet
def main(args):
    warnings.warn('Warning! Old training code is deprecated and will be deleted '
                  'in next version. Please use tools/train.py')
    load_config(cfg, args.config)
    local_rank = int(args.local_rank)
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    mkdir(local_rank, cfg.save_dir)                             # mkdir用@rank_filter包裹,主进程创建save_dir
    logger = Logger(local_rank, cfg.save_dir)
    if args.seed is not None:
        logger.log('Set random seed to {}'.format(args.seed))
        init_seeds(args.seed)

    logger.log('Creating model...')
    model = build_model(cfg.model)

    logger.log('Setting up data...')
    train_dataset = build_dataset(cfg.data.train, 'train')
    val_dataset = build_dataset(cfg.data.val, 'test')

    if len(cfg.device.gpu_ids) > 1:
        print('rank = ', local_rank)
        num_gpus = torch.cuda.device_count()
        torch.cuda.set_device(local_rank % num_gpus)
        dist.init_process_group(backend='nccl')
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu,
                                                       num_workers=cfg.device.workers_per_gpu, pin_memory=True,
                                                       collate_fn=collate_function, sampler=train_sampler,
                                                       drop_last=True)
    else:
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu,
                                                       shuffle=True, num_workers=cfg.device.workers_per_gpu,
                                                       pin_memory=True, collate_fn=collate_function, drop_last=True)

    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.device.batchsize_per_gpu,
                                                 shuffle=False, num_workers=cfg.device.workers_per_gpu,
                                                 pin_memory=True, collate_fn=collate_function, drop_last=True)

    trainer = build_trainer(local_rank, cfg, model, logger)

    if 'load_model' in cfg.schedule:
        trainer.load_model(cfg)
    if 'resume' in cfg.schedule:
        trainer.resume(cfg)

    evaluator = build_evaluator(cfg, val_dataset)

    logger.log('Starting training...')
    trainer.run(train_dataloader, val_dataloader, evaluator)
예제 #10
0
def main(config, model_path, output_path, input_shape=(320, 320)):
    logger = Logger(-1, config.save_dir, False)
    model = build_model(config.model)
    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)
    if config.model.arch.backbone.name == "RepVGG":
        deploy_config = config.model
        deploy_config.arch.backbone.update({"deploy": True})
        deploy_model = build_model(deploy_config)
        from nanodet.model.backbone.repvgg import repvgg_det_model_convert

        model = repvgg_det_model_convert(model, deploy_model)
    dummy_input = torch.autograd.Variable(
        torch.randn(1, 3, input_shape[0], input_shape[1])
    )

    torch.onnx.export(
        model,
        dummy_input,
        output_path,
        verbose=True,
        keep_initializers_as_inputs=True,
        opset_version=11,
        input_names=["data"],
        output_names=["output"],
    )
    logger.log("finished exporting onnx ")

    logger.log("start simplifying onnx ")
    input_data = {"data": dummy_input.detach().cpu().numpy()}
    model_sim, flag = onnxsim.simplify(output_path, input_data=input_data)
    if flag:
        onnx.save(model_sim, output_path)
        logger.log("simplify onnx successfully")
    else:
        logger.log("simplify onnx failed")
예제 #11
0
    def __init__(self, model_path, cfg_path, *args, **kwargs):
        from nanodet.model.arch import build_model
        from nanodet.util import Logger, cfg, load_config, load_model_weight

        super(NanoDetTorch, self).__init__(*args, **kwargs)
        print(f'Using PyTorch as inference backend')
        print(f'Using weight: {model_path}')

        # load model
        self.model_path = model_path
        self.cfg_path = cfg_path
        load_config(cfg, cfg_path)
        self.logger = Logger(-1, cfg.save_dir, False)
        self.model = build_model(cfg.model)
        checkpoint = self.torch.load(model_path, map_location=lambda storage, loc: storage)
        load_model_weight(self.model, checkpoint, self.logger)
예제 #12
0
 def export_ONNX_model(self,
                       config,
                       model_path,
                       output_path,
                       logger,
                       input_shape=(320, 320)):
     model = build_model(config.model)
     checkpoint = torch.load(model_path,
                             map_location=lambda storage, loc: storage)
     load_model_weight(model, checkpoint, logger)
     dummy_input = torch.autograd.Variable(
         torch.randn(1, 3, input_shape[0], input_shape[1]))
     torch.onnx.export(model,
                       dummy_input,
                       output_path,
                       verbose=True,
                       keep_initializers_as_inputs=True,
                       opset_version=11)
예제 #13
0
    def __init__(self, cfg, model_path, logger, device='cuda:0'):
        self.cfg = cfg
        self.device = device
        model = build_model(cfg.model)
        self.model = model.to(device).eval()
        self.pipeline = Pipeline(cfg.data.val.pipeline,
                                 cfg.data.val.keep_ratio)

        self.trt_model = model_path
        self.inference_fn = common.do_inference if trt.__version__[0] < '7' \
                                                else common.do_inference_v2
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self.engine = self._load_engine()
        try:
            self.context = self.engine.create_execution_context()
            self.inputs, self.outputs, self.bindings, self.stream = \
                common.allocate_buffers(self.engine)
        except Exception as e:
            raise RuntimeError('fail to allocate CUDA resources') from e
예제 #14
0
    def __init__(self, cfg, model_path, logger, device='cuda:0'):
        """
        :param cfg:
        :param model_path:
        :param logger:
        :param device:
        """
        self.cfg = cfg
        self.device = device

        model = build_model(cfg.model)

        ckpt = torch.load(model_path,
                          map_location=lambda storage, loc: storage)
        load_model_weight(model, ckpt, logger)
        print('INFO: {:s} loaded.'.format(model_path))
        self.model = model.to(device).eval()

        self.pipeline = Pipeline(cfg.data.val.pipeline,
                                 cfg.data.val.keep_ratio)
예제 #15
0
def test_config_files():
    root_path = join(dirname(__file__), "../..")
    cfg_folder = join(root_path, "config")
    if not exists(cfg_folder):
        raise FileNotFoundError("Cannot find config folder.")

    cfg_paths = collect_files(cfg_folder, [".yml", ".yaml"])
    for cfg_path in cfg_paths:
        print(f"Start testing {cfg_path}")
        config = copy.deepcopy(cfg)

        # test load cfg
        load_config(config, cfg_path)
        assert "save_dir" in config
        assert "model" in config
        assert "data" in config
        assert "device" in config
        assert "schedule" in config
        assert "log" in config

        # test build model
        model = build_model(config.model)
        assert config.model.arch.name == model.__class__.__name__
예제 #16
0
def main(config,
         model_path,
         output_path,
         input_shape=(320, 320),
         batch_size=1):
    logger = Logger(-1, config.save_dir, False)
    model = build_model(config.model)
    checkpoint = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
    load_model_weight(model, checkpoint, logger)
    dummy_input = torch.autograd.Variable(
        torch.randn(batch_size, 3, input_shape[0], input_shape[1]))
    torch.onnx.export(model,
                      dummy_input,
                      output_path,
                      verbose=True,
                      keep_initializers_as_inputs=True,
                      opset_version=12)
    onnx_model = onnx.load(output_path)  # load onnx model
    model_simp, check = simplify(onnx_model)
    assert check, "Simplified ONNX model could not be validated"
    onnx.save(model_simp, output_path)
    print('finished exporting onnx ')
예제 #17
0
def main(config, input_shape=(3, 320, 320)):
    model = build_model(config.model)
    flops, params = get_model_complexity_info(model, input_shape)
    split_line = '=' * 30
    print(f'{split_line}\nInput shape: {input_shape}\n'
          f'Flops: {flops*2}\nParams: {params}\n{split_line}')
예제 #18
0
def test_flops():
    load_config(cfg, "./config/legacy_v0.x_configs/nanodet-m.yml")

    model = build_model(cfg.model)
    input_shape = (3, 320, 320)
    get_model_complexity_info(model, input_shape)
예제 #19
0
    def startNanodetTrain(self):
        #加载配置文件
        load_config(cfg, self.nanoTrainConfig['cfg'])
        #判断分布式训练当中该主机的角色
        local_rank = int(self.nanoTrainConfig["local_rank"])
        # torch.backends.cudnn.enabled = True
        # torch.backends.cudnn.benchmark = True
        mkdir(local_rank, self.nanoTrainConfig["save_dir"])
        logger = Logger(local_rank, self.nanoTrainConfig["save_dir"])
        if self.nanoTrainConfig.keys().__contains__("seed"):
            logger.log('Set random seed to {}'.format(
                self.nanoTrainConfig['seed']))
            self.init_seeds(self.nanoTrainConfig['seed'])

        #1.创建模型
        model = build_model(cfg.model)
        model = model.cpu()

        #2.加载数据
        logger.log('Setting up data...')
        train_dataset = build_dataset(cfg.data.train, 'train',
                                      self.nanoTrainConfig)
        val_dataset = build_dataset(cfg.data.val, 'test', self.nanoTrainConfig)

        if len(cfg.device.gpu_ids) > 1:
            print('rank = ', local_rank)
            num_gpus = torch.cuda.device_count()
            torch.cuda.set_device(local_rank % num_gpus)
            dist.init_process_group(backend='nccl')
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
            train_dataloader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=cfg.device.batchsize_per_gpu,
                num_workers=cfg.device.workers_per_gpu,
                pin_memory=True,
                collate_fn=collate_function,
                sampler=train_sampler,
                drop_last=True)
        else:
            print("加载数据...")
            train_dataloader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=cfg.device.batchsize_per_gpu,
                shuffle=True,
                num_workers=cfg.device.workers_per_gpu,
                pin_memory=True,
                collate_fn=collate_function,
                drop_last=True)

        val_dataloader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=1,
            shuffle=False,
            num_workers=1,
            pin_memory=True,
            collate_fn=collate_function,
            drop_last=True)

        trainer = build_trainer(local_rank, cfg, model, logger)

        if 'load_model' in cfg.schedule:
            trainer.load_model(cfg)
        if 'resume' in cfg.schedule:
            trainer.resume(cfg)

        evaluator = build_evaluator(cfg, val_dataset)

        logger.log('Starting training...')
        trainer.run(train_dataloader, val_dataloader, evaluator,
                    self.nanoTrainConfig)
예제 #20
0
def run(args):
    """
    :param args:
    :return:
    """
    load_config(cfg, args.config)

    local_rank = int(args.local_rank)  # what's this?
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    mkdir(local_rank, cfg.save_dir)
    logger = Logger(local_rank, cfg.save_dir)

    if args.seed is not None:
        logger.log('Set random seed to {}'.format(args.seed))
        init_seeds(args.seed)

    logger.log('Creating model...')
    model = build_model(cfg.model)

    logger.log('Setting up data...')
    train_dataset = build_dataset(cfg.data.train, 'train')  # build_dataset(cfg.data.train, 'train')
    val_dataset = build_dataset(cfg.data.val, 'test')

    if len(cfg.device.gpu_ids) > 1:  # More than one GPU(distributed training)
        print('rank = ', local_rank)
        num_gpus = torch.cuda.device_count()
        torch.cuda.set_device(local_rank % num_gpus)
        dist.init_process_group(backend='nccl')
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)

        if args.is_debug:
            train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                            batch_size=cfg.device.batchsize_per_gpu,
                                                            num_workers=0,
                                                            pin_memory=True,
                                                            collate_fn=collate_function,
                                                            sampler=train_sampler,
                                                            drop_last=True)
        else:
            train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                            batch_size=cfg.device.batchsize_per_gpu,
                                                            num_workers=cfg.device.workers_per_gpu,
                                                            pin_memory=True,
                                                            collate_fn=collate_function,
                                                            sampler=train_sampler,
                                                            drop_last=True)
    else:
        if args.is_debug:
            train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                            batch_size=cfg.device.batchsize_per_gpu,
                                                            shuffle=True,
                                                            num_workers=0,
                                                            pin_memory=True,
                                                            collate_fn=collate_function,
                                                            drop_last=True)
        else:
            train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                            batch_size=cfg.device.batchsize_per_gpu,
                                                            shuffle=True,
                                                            num_workers=cfg.device.workers_per_gpu,
                                                            pin_memory=True,
                                                            collate_fn=collate_function,
                                                            drop_last=True)

    if args.is_debug:
        val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      num_workers=0,
                                                      pin_memory=True,
                                                      collate_fn=collate_function, drop_last=True)
    else:
        val_data_loader = torch.utils.data.DataLoader(val_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      num_workers=1,
                                                      pin_memory=True,
                                                      collate_fn=collate_function, drop_last=True)

    # -----
    trainer = build_trainer(local_rank, cfg, model, logger)

    if 'load_model' in cfg.schedule:
        trainer.load_model(cfg)
    if 'resume' in cfg.schedule:
        trainer.resume(cfg)

    # ----- Build a evaluator
    evaluator = build_evaluator(cfg, val_dataset)
    # evaluator = None

    logger.log('Starting training...')
    trainer.run(train_data_loader, val_data_loader, evaluator)
예제 #21
0
import torch
from nanodet.model.arch import build_model
from nanodet.util import cfg, load_config

from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
"""
NanoDet model can be installed from https://github.com/RangiLyu/nanodet.git
"""

cfg_path = r"nanodet/config/nanodet-RepVGG-A0_416.yml"
load_config(cfg, cfg_path)

model = build_model(cfg.model).cpu()
dummy_input = torch.rand(8, 3, 416, 416)

op_names = []
# these three conv layers are followed by reshape-like functions
# that cannot be replaced, so we skip these three conv layers,
# you can also get such layers by `not_safe_to_prune` function
excludes = ['head.gfl_cls.0', 'head.gfl_cls.1', 'head.gfl_cls.2']
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        if name not in excludes:
            op_names.append(name)

cfg_list = [{'op_types': ['Conv2d'], 'sparsity': 0.5, 'op_names': op_names}]
pruner = L1FilterPruner(model, cfg_list)
pruner.compress()
pruner.export_model('./model', './mask')
# need call _unwrap_model if you want run the speedup on the same model