コード例 #1
0
ファイル: test_adaptor_onnxrt.py プロジェクト: intel/lpot
    def test_adaptor(self):
        for fake_yaml in ["static.yaml", "dynamic.yaml"]:
            quantizer = Quantization(fake_yaml)
            quantizer.calib_dataloader = self.cv_dataloader
            quantizer.eval_dataloader = self.cv_dataloader
            quantizer.model = common.Model(self.rn50_model)
            q_model = quantizer()
            eval_func(q_model)
        for fake_yaml in ["non_MSE.yaml"]:
            quantizer = Quantization(fake_yaml)
            quantizer.calib_dataloader = self.cv_dataloader
            quantizer.eval_dataloader = self.cv_dataloader
            quantizer.model = common.Model(self.mb_v2_model)
            q_model = quantizer()
            eval_func(q_model)

        for fake_yaml in ["static.yaml"]:
            quantizer = Quantization(fake_yaml)
            quantizer.calib_dataloader = self.ir3_dataloader
            quantizer.eval_dataloader = self.ir3_dataloader
            quantizer.model = common.Model(self.ir3_model)
            q_model = quantizer()

        for mode in ["performance", "accuracy"]:
            fake_yaml = "benchmark.yaml"
            evaluator = Benchmark(fake_yaml)
            evaluator.b_dataloader = self.cv_dataloader
            evaluator.model = common.Model(self.rn50_model)
            evaluator(mode)
コード例 #2
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
 def test_quantization_saved(self):
     for fake_yaml in [
             'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml'
     ]:
         if fake_yaml == 'dynamic_yaml.yaml':
             model = torchvision.models.resnet18()
         else:
             model = copy.deepcopy(self.model)
         if fake_yaml == 'ptq_yaml.yaml':
             model.eval().fuse_model()
         quantizer = Quantization(fake_yaml)
         dataset = quantizer.dataset('dummy', (100, 3, 256, 256),
                                     label=True)
         quantizer.model = common.Model(model)
         if fake_yaml == 'qat_yaml.yaml':
             quantizer.q_func = q_func
         else:
             quantizer.calib_dataloader = common.DataLoader(dataset)
         quantizer.eval_dataloader = common.DataLoader(dataset)
         q_model = quantizer()
         q_model.save('./saved')
         # Load configure and weights by lpot.utils
         saved_model = load("./saved", model)
         eval_func(saved_model)
         shutil.rmtree('./saved', ignore_errors=True)
     from lpot.experimental import Benchmark
     evaluator = Benchmark('ptq_yaml.yaml')
     # Load configure and weights by lpot.model
     evaluator.model = common.Model(model)
     evaluator.b_dataloader = common.DataLoader(dataset)
     evaluator()
     evaluator.model = common.Model(model)
     evaluator()
コード例 #3
0
    def test_quantization_saved(self):
        from lpot.utils.pytorch import load

        for fake_yaml in [
                'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml'
        ]:
            if fake_yaml == 'dynamic_yaml.yaml':
                model = torchvision.models.resnet18()
            else:
                model = copy.deepcopy(self.model)
            if fake_yaml == 'ptq_yaml.yaml':
                model.eval().fuse_model()
            quantizer = Quantization(fake_yaml)
            dataset = quantizer.dataset('dummy', (100, 3, 256, 256),
                                        label=True)
            quantizer.model = common.Model(model)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            if fake_yaml == 'qat_yaml.yaml':
                quantizer.q_func = q_func
            q_model = quantizer()
            q_model.save('./saved')
            # Load configure and weights by lpot.utils
            saved_model = load("./saved", model)
            eval_func(saved_model)
        from lpot.experimental import Benchmark
        evaluator = Benchmark('ptq_yaml.yaml')
        # Load configure and weights by lpot.model
        evaluator.model = common.Model(model)
        evaluator.b_dataloader = common.DataLoader(dataset)
        results = evaluator()
        evaluator.model = common.Model(model)
        fp32_results = evaluator()
        self.assertTrue(
            (fp32_results['accuracy'][0] - results['accuracy'][0]) < 0.01)
コード例 #4
0
ファイル: main.py プロジェクト: vuiseng9/lpot
    def run(self):
        """ This is lpot function include tuning and benchmark option """

        if self.args.tune:
            from lpot.experimental import Quantization, common
            quantizer = Quantization(self.args.config)
            quantizer.model = common.Model(self.args.input_graph)
            q_model = quantizer()
            q_model.save(self.args.output_graph)

        if self.args.benchmark:
            from lpot.experimental import Benchmark, common
            evaluator = Benchmark(self.args.config)
            evaluator.model = common.Model(self.args.input_graph)
            evaluator(self.args.mode)
コード例 #5
0
def main_worker(args):
    global best_acc1

    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.topology))
        model = models.__dict__[args.topology](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.topology))
        model = models.__dict__[args.topology]()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.prune:
        from lpot.experimental import Pruning, common
        prune = Pruning(args.config)

        prune.model = common.Model(model)
        model = prune()
        model.save(args.output_model)
        return
コード例 #6
0
ファイル: test_adaptor_onnxrt.py プロジェクト: jeffmaxey/lpot
 def test_quantizate(self):
     from lpot.experimental import Quantization, common
     for fake_yaml in ["static_yaml.yaml", "dynamic_yaml.yaml"]:
         quantizer = Quantization(fake_yaml)
         quantizer.calib_dataloader = self.cv_dataloader
         quantizer.eval_dataloader = self.cv_dataloader
         quantizer.model = common.Model(self.rn50_model)
         q_model = quantizer()
         eval_func(q_model)
     for fake_yaml in ["non_MSE_yaml.yaml"]:
         quantizer = Quantization(fake_yaml)
         quantizer.calib_dataloader = self.cv_dataloader
         quantizer.eval_dataloader = self.cv_dataloader
         quantizer.model = common.Model(self.mb_v2_model)
         q_model = quantizer()
         eval_func(q_model)
コード例 #7
0
def main():

    from lpot.experimental import Quantization, common
    quantizer = Quantization('./conf.yaml')

    # Do quantization
    quantizer.model = common.Model('./inception_v1.ckpt')
    quantized_model = quantizer()
コード例 #8
0
ファイル: test.py プロジェクト: peiwenhuang27/lpot
def main():

    quantizer = Quantization('./conf.yaml')
    dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
    quantizer.model = common.Model(
        './model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/'
    )
    quantizer.calib_dataloader = common.DataLoader(dataset)
    quantized_model = quantizer()
コード例 #9
0
def main():
    class CalibrationDL():
        def __init__(self):
            path = os.path.abspath(
                os.path.expanduser('./brats_cal_images_list.txt'))
            with open(path, 'r') as f:
                self.preprocess_files = [line.rstrip() for line in f]

            self.loaded_files = {}
            self.batch_size = 1

        def __getitem__(self, sample_id):
            file_name = self.preprocess_files[sample_id]
            print("Loading file {:}".format(file_name))
            with open(
                    os.path.join('build/calib_preprocess/',
                                 "{:}.pkl".format(file_name)), "rb") as f:
                self.loaded_files[sample_id] = pickle.load(f)[0]
            # note that calibration phase does not care label, here we return 0 for label free case.
            return self.loaded_files[sample_id], 0

        def __len__(self):
            self.count = len(self.preprocess_files)
            return self.count

    args = get_args()
    assert args.backend == "pytorch"
    model_path = os.path.join(args.model_dir, "plans.pkl")
    assert os.path.isfile(
        model_path), "Cannot find the model file {:}!".format(model_path)
    trainer, params = load_model_and_checkpoint_files(
        args.model_dir,
        folds=1,
        fp16=False,
        checkpoint_name='model_final_checkpoint')
    trainer.load_checkpoint_ram(params[0], False)
    model = trainer.network

    if args.tune:
        quantizer = Quantization('conf.yaml')
        quantizer.model = common.Model(model)
        quantizer.eval_func = eval_func
        quantizer.calib_dataloader = common.DataLoader(CalibrationDL())
        q_model = quantizer()
        q_model.save('./lpot_workspace')
        exit(0)

    if args.benchmark:
        model.eval()
        if args.int8:
            from lpot.utils.pytorch import load
            new_model = load(
                os.path.abspath(os.path.expanduser('./lpot_workspace')), model)
        else:
            new_model = model
        eval_func(new_model)
コード例 #10
0
ファイル: test.py プロジェクト: jeffmaxey/lpot
def main():

    from lpot.experimental import Quantization, common
    quantizer = Quantization('./conf.yaml')
    quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb")
    quantized_model = quantizer()

    # Optional, run benchmark
    from lpot.experimental import Benchmark
    evaluator = Benchmark('./conf.yaml')
    evaluator.model = common.Model(quantized_model.graph_def)
    results = evaluator()
    batch_size = 1
    for mode, result in results.items():
        acc, batch_size, result_list = result
        latency = np.array(result_list).mean() / batch_size

        print('Accuracy is {:.3f}'.format(acc))
        print('Latency: {:.3f} ms'.format(latency * 1000))
コード例 #11
0
ファイル: test_pruning.py プロジェクト: intel/lpot
    def test_pruning_external(self):
        from lpot.experimental import common
        from lpot import Pruning
        prune = Pruning('fake.yaml')
        datasets = DATASETS('pytorch')
        dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224),
                                          low=0.,
                                          high=1.,
                                          label=True)
        dummy_dataloader = PyTorchDataLoader(dummy_dataset)

        def training_func_for_lpot(model):
            epochs = 16
            iters = 30
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
            for nepoch in range(epochs):
                model.train()
                cnt = 0
                prune.on_epoch_begin(nepoch)
                for image, target in dummy_dataloader:
                    prune.on_batch_begin(cnt)
                    print('.', end='')
                    cnt += 1
                    output = model(image)
                    loss = criterion(output, target)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    prune.on_batch_end()
                    if cnt >= iters:
                        break
                prune.on_epoch_end()

        prune.model = common.Model(self.model)
        prune.pruning_func = training_func_for_lpot
        prune.eval_dataloader = dummy_dataloader
        prune.train_dataloader = dummy_dataloader
        _ = prune(common.Model(self.model), \
                  train_dataloader=dummy_dataloader, \
                  pruning_func=training_func_for_lpot, \
                  eval_dataloader=dummy_dataloader)
コード例 #12
0
def main():
    arg_parser = ArgumentParser(description='Parse args')
    arg_parser.add_argument('--benchmark',
                            action='store_true',
                            help='run benchmark')
    arg_parser.add_argument('--tune', action='store_true', help='run tuning')
    args = arg_parser.parse_args()

    if args.tune:
        from lpot.experimental import Quantization, common
        quantizer = Quantization('./conf.yaml')
        quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb")
        quantized_model = quantizer()
        quantized_model.save('./int8.pb')

    if args.benchmark:
        from lpot.experimental import Benchmark, common
        evaluator = Benchmark('./conf.yaml')
        evaluator.model = common.Model('int8.pb')
        evaluator(mode='accuracy')
コード例 #13
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
 def test_tuning_ipex(self):
     from lpot.experimental import Quantization
     model = torchvision.models.resnet18()
     quantizer = Quantization('ipex_yaml.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True)
     quantizer.model = common.Model(model)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     lpot_model = quantizer()
     lpot_model.save("./saved")
     try:
         script_model = torch.jit.script(model.to(ipex.DEVICE))
     except:
         script_model = torch.jit.trace(
             model.to(ipex.DEVICE),
             torch.randn(10, 3, 224, 224).to(ipex.DEVICE))
     from lpot.experimental import Benchmark
     evaluator = Benchmark('ipex_yaml.yaml')
     evaluator.model = common.Model(script_model)
     evaluator.b_dataloader = common.DataLoader(dataset)
     results = evaluator()
コード例 #14
0
def quantize(model, q_data, e_data):
    from lpot.experimental import Quantization, common
    from lpot.experimental.common import DataLoader

    quantizer = Quantization('fake_yaml.yaml')

    q_dataloader = DataLoader(dataset=list(zip(q_data[0], q_data[1])))
    e_dataloader = DataLoader(dataset=list(zip(e_data[0], e_data[1])))
    quantizer.model = common.Model(model)
    quantizer.calib_dataloader = q_dataloader
    quantizer.eval_dataloader = e_dataloader
    quantized_model = quantizer()
    return quantized_model
コード例 #15
0
ファイル: infer_detections.py プロジェクト: intel/lpot
def main(_):
    if FLAGS.benchmark:
        run_benchmark()
    else:
        FLAGS.batch_size = 1
        from lpot.experimental import Quantization, common
        quantizer = Quantization(FLAGS.config)
        quantizer.model = common.Model(FLAGS.input_graph)
        kwargs = {'conf_threshold': FLAGS.conf_threshold,
                  'iou_threshold': FLAGS.iou_threshold}
        quantizer.postprocess = common.Postprocess(NMS, 'NMS', **kwargs)
        q_model = quantizer()
        q_model.save(FLAGS.output_graph)
コード例 #16
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
    def test_fx_dynamic_quant(self):
        # Model Definition
        class LSTMModel(nn.Module):
            """Container module with an encoder, a recurrent module, and a decoder."""
            def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
                super(LSTMModel, self).__init__()
                self.drop = nn.Dropout(dropout)
                self.encoder = nn.Embedding(ntoken, ninp)
                self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
                self.decoder = nn.Linear(nhid, ntoken)
                self.init_weights()
                self.nhid = nhid
                self.nlayers = nlayers

            def init_weights(self):
                initrange = 0.1
                self.encoder.weight.data.uniform_(-initrange, initrange)
                self.decoder.bias.data.zero_()
                self.decoder.weight.data.uniform_(-initrange, initrange)

            def forward(self, input, hidden):
                emb = self.drop(self.encoder(input))
                output, hidden = self.rnn(emb, hidden)
                output = self.drop(output)
                decoded = self.decoder(output)
                return decoded, hidden

        version = get_torch_version()
        if version >= '1.8':
            model = LSTMModel(
                ntoken=10,
                ninp=512,
                nhid=256,
                nlayers=5,
            )

            # run fx_quant in lpot and save the quantized GraphModule
            model.eval()
            quantizer = Quantization('fx_dynamic_yaml.yaml')
            quantizer.model = common.Model(model, **{'a': 1})
            q_model = quantizer()
            q_model.save('./saved_dynamic_fx')

            # Load configure and weights by lpot.utils
            model_fx = load("./saved_dynamic_fx", model, **{'a': 1})
            if version >= '1.8':
                self.assertTrue(
                    isinstance(model_fx, torch.fx.graph_module.GraphModule))
            else:
                self.assertTrue(
                    isinstance(model_fx, torch._fx.graph_module.GraphModule))
コード例 #17
0
ファイル: main.py プロジェクト: jeffmaxey/lpot
    def run(self):
        """ This is lpot function include tuning and benchmark option """

        if self.args.tune:
            from lpot.experimental import Quantization, common
            quantizer = Quantization(self.args.config)
            quantizer.model = common.Model(self.args.input_graph)
            q_model = quantizer()
            q_model.save(self.args.output_graph)

        if self.args.benchmark:
            from lpot.experimental import Benchmark, common
            evaluator = Benchmark(self.args.config)
            evaluator.model = common.Model(self.args.input_graph)
            results = evaluator()
            for mode, result in results.items():
                acc, batch_size, result_list = result
                latency = np.array(result_list).mean() / batch_size

                print('\n{} mode benchmark result:'.format(mode))
                print('Accuracy is {:.3f}'.format(acc))
                print('Batch size = {}'.format(batch_size))
                print('Latency: {:.3f} ms'.format(latency * 1000))
                print('Throughput: {:.3f} images/sec'.format(1. / latency))
コード例 #18
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
 def test_tensorboard(self):
     model = copy.deepcopy(self.lpot_model)
     model.model.eval().fuse_model()
     quantizer = Quantization('dump_yaml.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True)
     quantizer.model = common.Model(model.model)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.eval_func = eval_func
     quantizer()
     self.assertTrue(
         True if os.path.exists('runs/eval/baseline_acc0.0') else False)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     quantizer.eval_func = None
     quantizer()
     self.assertTrue(
         True if os.path.exists('runs/eval/baseline_acc0.0') else False)
コード例 #19
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
 def test_tensor_dump_and_set(self):
     model = copy.deepcopy(self.lpot_model)
     model.model.eval().fuse_model()
     quantizer = Quantization('ptq_yaml.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True)
     dataloader = common.DataLoader(dataset)
     dataloader = common._generate_common_dataloader(dataloader, 'pytorch')
     quantizer.eval_dataloader = dataloader
     quantizer.calib_dataloader = dataloader
     quantizer.model = common.Model(model.model)
     q_model = quantizer()
     quantizer.strategy.adaptor.inspect_tensor(
         model,
         dataloader,
         op_list=['conv1.0', 'layer1.0.conv1.0'],
         iteration_list=[1, 2],
         inspect_type='all',
         save_to_disk=True)
     load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k)
     a = load_array('saved/dump_tensor/activation_iter1.npz')
     w = load_array('saved/dump_tensor/weight.npz')
     version = get_torch_version()
     if version >= '1.8':
         self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] ==
                         a['conv1.0'].item()['conv1.0.output0'].shape[1])
     else:
         self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] ==
                         a['conv1.0'].item()['conv1.1.output0'].shape[1])
     data = np.random.random(
         w['conv1.0'].item()['conv1.0.weight'].shape).astype(np.float32)
     quantizer.strategy.adaptor.set_tensor(q_model,
                                           {'conv1.0.weight': data})
     changed_tensor = q_model.get_weight('conv1.weight')
     scales = changed_tensor.q_per_channel_scales()
     changed_tensor_fp32 = torch.dequantize(changed_tensor)
     self.assertTrue(
         np.allclose(data,
                     changed_tensor_fp32.numpy(),
                     atol=2 / np.min(scales.numpy())))
     quantizer.strategy.adaptor.inspect_tensor(
         q_model,
         dataloader,
         op_list=['conv1.0', 'layer1.0.conv1.0'],
         iteration_list=[1, 2],
         inspect_type='all',
         save_to_disk=False)
コード例 #20
0
def benchmark_model(
    input_graph: str,
    config: str,
    benchmark_mode: str,
    framework: str,
) -> None:
    """Execute benchmark."""
    from lpot.experimental import Benchmark, common

    if framework == "onnxrt":
        import onnx

        input_graph = onnx.load(input_graph)

    evaluator = Benchmark(config)
    evaluator.model = common.Model(input_graph)
    evaluator(benchmark_mode)
コード例 #21
0
ファイル: test_adaptor_pytorch.py プロジェクト: intel/lpot
    def test_fx_quant(self):
        version = get_torch_version()
        if version >= '1.8':
            model_origin = torchvision.models.resnet18()

            # run fx_quant in lpot and save the quantized GraphModule
            quantizer = Quantization('fx_ptq_yaml.yaml')
            dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_func = eval_func
            quantizer.model = common.Model(model_origin, **{'a': 1})
            q_model = quantizer()
            q_model.save('./saved_static_fx')

            # Load configure and weights by lpot.utils
            model_fx = load("./saved_static_fx", model_origin, **{'a': 1})
            self.assertTrue(
                isinstance(model_fx, torch.fx.graph_module.GraphModule))
コード例 #22
0
ファイル: tune_model.py プロジェクト: peiwenhuang27/lpot
def tune_model(
    input_graph: str,
    output_graph: str,
    config: str,
    framework: str,
) -> None:
    """Execute tuning."""
    from lpot.experimental import Quantization, common

    if framework == "onnxrt":
        import onnx

        input_graph = onnx.load(input_graph)

    quantizer = Quantization(config)
    quantizer.model = common.Model(input_graph)
    quantized_model = quantizer()
    quantized_model.save(output_graph)
コード例 #23
0
    def test_quantization_saved(self):
        from lpot.utils.pytorch import load

        for fake_yaml in ['dynamic_yaml.yaml', 'ptq_yaml.yaml']:
            if fake_yaml == 'dynamic_yaml.yaml':
                model = torchvision.models.quantization.resnet18()
            else:
                model = copy.deepcopy(self.model)
            if fake_yaml == 'ptq_yaml.yaml':
                model.eval().fuse_model()
            quantizer = Quantization(fake_yaml)
            dataset = quantizer.dataset('dummy', (100, 3, 256, 256),
                                        label=True)
            quantizer.model = common.Model(model)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            q_model = quantizer()
        self.assertTrue(bool(q_model))
コード例 #24
0
ファイル: main.py プロジェクト: vuiseng9/lpot
def main(_):
    graph = load_graph(FLAGS.input_graph)
    if FLAGS.mode == 'tune':
        from lpot.experimental import Quantization, common
        quantizer = Quantization(FLAGS.config)
        ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
        quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \
                                                 batch_size=FLAGS.batch_size)
        quantizer.model = common.Model(graph)
        quantizer.eval_func = eval_func
        q_model = quantizer()
        try:
            q_model.save(FLAGS.output_model)
        except Exception as e:
            print("Failed to save model due to {}".format(str(e)))
    elif FLAGS.mode == 'benchmark':
        eval_func(graph, FLAGS.iters)
    elif FLAGS.mode == 'accuracy':
        eval_func(graph, -1)
コード例 #25
0
def benchmark_model(
    input_graph: str,
    config: str,
    benchmark_mode: str,
    framework: str,
    datatype: str = "",
) -> List[Dict[str, Any]]:
    """Execute benchmark."""
    from lpot.experimental import Benchmark, common

    benchmark_results = []

    if framework == "onnxrt":
        import onnx

        input_graph = onnx.load(input_graph)

    evaluator = Benchmark(config)
    evaluator.model = common.Model(input_graph)
    results = evaluator()
    for mode, result in results.items():
        if benchmark_mode == mode:
            log.info(f"Mode: {mode}")
            acc, batch_size, result_list = result
            latency = (sum(result_list) / len(result_list)) / batch_size
            log.info(f"Batch size: {batch_size}")
            if mode == "accuracy":
                log.info(f"Accuracy: {acc:.3f}")
            elif mode == "performance":
                log.info(f"Latency: {latency * 1000:.3f} ms")
                log.info(f"Throughput: {1. / latency:.3f} images/sec")

            benchmark_results.append(
                {
                    "precision": datatype,
                    "mode": mode,
                    "batch_size": batch_size,
                    "accuracy": acc,
                    "latency": latency * 1000,
                    "throughput": 1.0 / latency,
                }, )
    return benchmark_results
コード例 #26
0
ファイル: test_adaptor_onnxrt.py プロジェクト: intel/lpot
 def test_set_tensor(self):
     quantizer = Quantization("static.yaml")
     quantizer.calib_dataloader = self.cv_dataloader
     quantizer.eval_dataloader = self.cv_dataloader
     quantizer.model = common.Model(self.mb_v2_model)
     q_model = quantizer()
     framework_specific_info = {"device": "cpu",
                  "approach": "post_training_static_quant",
                  "random_seed": 1234,
                  "q_dataloader": None,
                  "backend": "qlinearops",
                  "workspace_path": './lpot_workspace/{}/{}/'.format(
                                          'onnxrt',
                                          'imagenet')}
     framework = "onnxrt_qlinearops"
     adaptor = FRAMEWORKS[framework](framework_specific_info) 
     q_config = {'fused Conv_0': {'weight': {'granularity': 'per_channel', 'dtype': onnx_proto.TensorProto.INT8}}}
     adaptor.q_config = q_config
     adaptor.set_tensor(q_model, {'ConvBnFusion_W_features.0.0.weight': np.random.random([32, 3, 3, 3])})
     adaptor.set_tensor(q_model, {'ConvBnFusion_BN_B_features.0.1.bias': np.random.random([32])})
コード例 #27
0
ファイル: distiller_bert.py プロジェクト: vuiseng9/lpot
def train(args, train_dataset, model, tokenizer):
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)
    def train_func(model):
        return take_train_steps(args, model, tokenizer, train_dataloader, prune)
    
    def eval_func(model):
        return take_eval_steps(args, model, tokenizer, prune)

    if args.prune:
        from lpot.experimental import Pruning, common
        prune = Pruning(args.config)
        prune.model = common.Model(model)
        prune.train_dataloader = train_dataloader
        prune.pruning_func = train_func
        prune.eval_dataloader = train_dataloader
        prune.eval_func = eval_func
        model = prune()
        torch.save(model, args.output_model)
コード例 #28
0
ファイル: test_pruning.py プロジェクト: vuiseng9/lpot
    def test_pruning(self):
        from lpot.experimental import Pruning, common
        prune = Pruning('fake.yaml')

        dummy_dataset = PyTorchDummyDataset([tuple([100, 3, 256, 256])])
        dummy_dataloader = PyTorchDataLoader(dummy_dataset)

        def training_func_for_lpot(model):
            epochs = 16
            iters = 30
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
            for nepoch in range(epochs):
                model.train()
                cnt = 0
                prune.on_epoch_begin(nepoch)
                for image, target in dummy_dataloader:
                    prune.on_batch_begin(cnt)
                    print('.', end='')
                    cnt += 1
                    output = model(image)
                    loss = criterion(output, target)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    prune.on_batch_end()
                    if cnt >= iters:
                        break
                prune.on_epoch_end()

        dummy_dataset = PyTorchDummyDataset(tuple([100, 3, 256, 256]),
                                            label=True)
        dummy_dataloader = PyTorchDataLoader(dummy_dataset)
        prune.model = common.Model(self.model)
        prune.pruning_func = training_func_for_lpot
        prune.eval_dataloader = dummy_dataloader
        prune.train_dataloader = dummy_dataloader
        _ = prune()
コード例 #29
0
    def test_model_conversion(self):
        from lpot.experimental import ModelConversion, common
        conversion = ModelConversion()
        conversion.source = 'qat'
        conversion.destination = 'default'
        conversion.model = common.Model(self._qat_temp_path)
        q_model = conversion()
        q_model.save(self._quantized_temp_path)

        graph = tf.compat.v1.Graph()
        with graph.as_default():
            with tf.compat.v1.Session() as sess:
                meta_graph = tf.compat.v1.saved_model.loader.load(
                    sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
                    self._quantized_temp_path)
                print(meta_graph.graph_def.node)
                for i in meta_graph.graph_def.node:
                    if 'MatMul' in i.op:
                        self.assertTrue('QuantizedMatMul' in i.op)
                    if 'MaxPool' in i.op:
                        self.assertTrue('QuantizedMaxPool' in i.op)
                    if 'Conv2D' in i.op:
                        self.assertTrue('QuantizedConv2D' in i.op)
コード例 #30
0
ファイル: bert_base.py プロジェクト: jeffmaxey/lpot
                                      dummy_dataloader,
                                      benchmark=True)
            latency = np.array(results).mean() / args.eval_batch_size

            print('Latency: {:.3f} ms'.format(latency * 1000))
            print('Throughput: {:.3f} items/sec'.format(args.eval_batch_size *
                                                        1. / latency))
        print('--------------------------------------------------------------')

    if args.tune:
        from onnxruntime.transformers import optimizer
        from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions
        opt_options = BertOptimizationOptions('bert')
        opt_options.enable_embed_layer_norm = False

        model_optimizer = optimizer.optimize_model(
            args.model_path,
            'bert',
            num_heads=12,
            hidden_size=768,
            optimization_options=opt_options)
        model = model_optimizer.model

        from lpot.experimental import Quantization, common
        quantize = Quantization(args.config)
        quantize.model = common.Model(model)
        quantize.calib_dataloader = eval_dataloader
        quantize.eval_func = eval_func
        q_model = quantize()
        q_model.save(args.output_model)