def test_adaptor(self): for fake_yaml in ["static.yaml", "dynamic.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.rn50_model) q_model = quantizer() eval_func(q_model) for fake_yaml in ["non_MSE.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() eval_func(q_model) for fake_yaml in ["static.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.ir3_dataloader quantizer.eval_dataloader = self.ir3_dataloader quantizer.model = common.Model(self.ir3_model) q_model = quantizer() for mode in ["performance", "accuracy"]: fake_yaml = "benchmark.yaml" evaluator = Benchmark(fake_yaml) evaluator.b_dataloader = self.cv_dataloader evaluator.model = common.Model(self.rn50_model) evaluator(mode)
def test_quantization_saved(self): for fake_yaml in [ 'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml' ]: if fake_yaml == 'dynamic_yaml.yaml': model = torchvision.models.resnet18() else: model = copy.deepcopy(self.model) if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) if fake_yaml == 'qat_yaml.yaml': quantizer.q_func = q_func else: quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer() q_model.save('./saved') # Load configure and weights by lpot.utils saved_model = load("./saved", model) eval_func(saved_model) shutil.rmtree('./saved', ignore_errors=True) from lpot.experimental import Benchmark evaluator = Benchmark('ptq_yaml.yaml') # Load configure and weights by lpot.model evaluator.model = common.Model(model) evaluator.b_dataloader = common.DataLoader(dataset) evaluator() evaluator.model = common.Model(model) evaluator()
def test_quantization_saved(self): from lpot.utils.pytorch import load for fake_yaml in [ 'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml' ]: if fake_yaml == 'dynamic_yaml.yaml': model = torchvision.models.resnet18() else: model = copy.deepcopy(self.model) if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) if fake_yaml == 'qat_yaml.yaml': quantizer.q_func = q_func q_model = quantizer() q_model.save('./saved') # Load configure and weights by lpot.utils saved_model = load("./saved", model) eval_func(saved_model) from lpot.experimental import Benchmark evaluator = Benchmark('ptq_yaml.yaml') # Load configure and weights by lpot.model evaluator.model = common.Model(model) evaluator.b_dataloader = common.DataLoader(dataset) results = evaluator() evaluator.model = common.Model(model) fp32_results = evaluator() self.assertTrue( (fp32_results['accuracy'][0] - results['accuracy'][0]) < 0.01)
def run(self): """ This is lpot function include tuning and benchmark option """ if self.args.tune: from lpot.experimental import Quantization, common quantizer = Quantization(self.args.config) quantizer.model = common.Model(self.args.input_graph) q_model = quantizer() q_model.save(self.args.output_graph) if self.args.benchmark: from lpot.experimental import Benchmark, common evaluator = Benchmark(self.args.config) evaluator.model = common.Model(self.args.input_graph) evaluator(self.args.mode)
def main_worker(args): global best_acc1 if args.pretrained: print("=> using pre-trained model '{}'".format(args.topology)) model = models.__dict__[args.topology](pretrained=True) else: print("=> creating model '{}'".format(args.topology)) model = models.__dict__[args.topology]() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.prune: from lpot.experimental import Pruning, common prune = Pruning(args.config) prune.model = common.Model(model) model = prune() model.save(args.output_model) return
def test_quantizate(self): from lpot.experimental import Quantization, common for fake_yaml in ["static_yaml.yaml", "dynamic_yaml.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.rn50_model) q_model = quantizer() eval_func(q_model) for fake_yaml in ["non_MSE_yaml.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() eval_func(q_model)
def main(): from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') # Do quantization quantizer.model = common.Model('./inception_v1.ckpt') quantized_model = quantizer()
def main(): quantizer = Quantization('./conf.yaml') dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True) quantizer.model = common.Model( './model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/' ) quantizer.calib_dataloader = common.DataLoader(dataset) quantized_model = quantizer()
def main(): class CalibrationDL(): def __init__(self): path = os.path.abspath( os.path.expanduser('./brats_cal_images_list.txt')) with open(path, 'r') as f: self.preprocess_files = [line.rstrip() for line in f] self.loaded_files = {} self.batch_size = 1 def __getitem__(self, sample_id): file_name = self.preprocess_files[sample_id] print("Loading file {:}".format(file_name)) with open( os.path.join('build/calib_preprocess/', "{:}.pkl".format(file_name)), "rb") as f: self.loaded_files[sample_id] = pickle.load(f)[0] # note that calibration phase does not care label, here we return 0 for label free case. return self.loaded_files[sample_id], 0 def __len__(self): self.count = len(self.preprocess_files) return self.count args = get_args() assert args.backend == "pytorch" model_path = os.path.join(args.model_dir, "plans.pkl") assert os.path.isfile( model_path), "Cannot find the model file {:}!".format(model_path) trainer, params = load_model_and_checkpoint_files( args.model_dir, folds=1, fp16=False, checkpoint_name='model_final_checkpoint') trainer.load_checkpoint_ram(params[0], False) model = trainer.network if args.tune: quantizer = Quantization('conf.yaml') quantizer.model = common.Model(model) quantizer.eval_func = eval_func quantizer.calib_dataloader = common.DataLoader(CalibrationDL()) q_model = quantizer() q_model.save('./lpot_workspace') exit(0) if args.benchmark: model.eval() if args.int8: from lpot.utils.pytorch import load new_model = load( os.path.abspath(os.path.expanduser('./lpot_workspace')), model) else: new_model = model eval_func(new_model)
def main(): from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb") quantized_model = quantizer() # Optional, run benchmark from lpot.experimental import Benchmark evaluator = Benchmark('./conf.yaml') evaluator.model = common.Model(quantized_model.graph_def) results = evaluator() batch_size = 1 for mode, result in results.items(): acc, batch_size, result_list = result latency = np.array(result_list).mean() / batch_size print('Accuracy is {:.3f}'.format(acc)) print('Latency: {:.3f} ms'.format(latency * 1000))
def test_pruning_external(self): from lpot.experimental import common from lpot import Pruning prune = Pruning('fake.yaml') datasets = DATASETS('pytorch') dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def training_func_for_lpot(model): epochs = 16 iters = 30 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) for nepoch in range(epochs): model.train() cnt = 0 prune.on_epoch_begin(nepoch) for image, target in dummy_dataloader: prune.on_batch_begin(cnt) print('.', end='') cnt += 1 output = model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() prune.on_batch_end() if cnt >= iters: break prune.on_epoch_end() prune.model = common.Model(self.model) prune.pruning_func = training_func_for_lpot prune.eval_dataloader = dummy_dataloader prune.train_dataloader = dummy_dataloader _ = prune(common.Model(self.model), \ train_dataloader=dummy_dataloader, \ pruning_func=training_func_for_lpot, \ eval_dataloader=dummy_dataloader)
def main(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--benchmark', action='store_true', help='run benchmark') arg_parser.add_argument('--tune', action='store_true', help='run tuning') args = arg_parser.parse_args() if args.tune: from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb") quantized_model = quantizer() quantized_model.save('./int8.pb') if args.benchmark: from lpot.experimental import Benchmark, common evaluator = Benchmark('./conf.yaml') evaluator.model = common.Model('int8.pb') evaluator(mode='accuracy')
def test_tuning_ipex(self): from lpot.experimental import Quantization model = torchvision.models.resnet18() quantizer = Quantization('ipex_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) lpot_model = quantizer() lpot_model.save("./saved") try: script_model = torch.jit.script(model.to(ipex.DEVICE)) except: script_model = torch.jit.trace( model.to(ipex.DEVICE), torch.randn(10, 3, 224, 224).to(ipex.DEVICE)) from lpot.experimental import Benchmark evaluator = Benchmark('ipex_yaml.yaml') evaluator.model = common.Model(script_model) evaluator.b_dataloader = common.DataLoader(dataset) results = evaluator()
def quantize(model, q_data, e_data): from lpot.experimental import Quantization, common from lpot.experimental.common import DataLoader quantizer = Quantization('fake_yaml.yaml') q_dataloader = DataLoader(dataset=list(zip(q_data[0], q_data[1]))) e_dataloader = DataLoader(dataset=list(zip(e_data[0], e_data[1]))) quantizer.model = common.Model(model) quantizer.calib_dataloader = q_dataloader quantizer.eval_dataloader = e_dataloader quantized_model = quantizer() return quantized_model
def main(_): if FLAGS.benchmark: run_benchmark() else: FLAGS.batch_size = 1 from lpot.experimental import Quantization, common quantizer = Quantization(FLAGS.config) quantizer.model = common.Model(FLAGS.input_graph) kwargs = {'conf_threshold': FLAGS.conf_threshold, 'iou_threshold': FLAGS.iou_threshold} quantizer.postprocess = common.Postprocess(NMS, 'NMS', **kwargs) q_model = quantizer() q_model.save(FLAGS.output_graph)
def test_fx_dynamic_quant(self): # Model Definition class LSTMModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5): super(LSTMModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) self.init_weights() self.nhid = nhid self.nlayers = nlayers def init_weights(self): initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.decoder.bias.data.zero_() self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden): emb = self.drop(self.encoder(input)) output, hidden = self.rnn(emb, hidden) output = self.drop(output) decoded = self.decoder(output) return decoded, hidden version = get_torch_version() if version >= '1.8': model = LSTMModel( ntoken=10, ninp=512, nhid=256, nlayers=5, ) # run fx_quant in lpot and save the quantized GraphModule model.eval() quantizer = Quantization('fx_dynamic_yaml.yaml') quantizer.model = common.Model(model, **{'a': 1}) q_model = quantizer() q_model.save('./saved_dynamic_fx') # Load configure and weights by lpot.utils model_fx = load("./saved_dynamic_fx", model, **{'a': 1}) if version >= '1.8': self.assertTrue( isinstance(model_fx, torch.fx.graph_module.GraphModule)) else: self.assertTrue( isinstance(model_fx, torch._fx.graph_module.GraphModule))
def run(self): """ This is lpot function include tuning and benchmark option """ if self.args.tune: from lpot.experimental import Quantization, common quantizer = Quantization(self.args.config) quantizer.model = common.Model(self.args.input_graph) q_model = quantizer() q_model.save(self.args.output_graph) if self.args.benchmark: from lpot.experimental import Benchmark, common evaluator = Benchmark(self.args.config) evaluator.model = common.Model(self.args.input_graph) results = evaluator() for mode, result in results.items(): acc, batch_size, result_list = result latency = np.array(result_list).mean() / batch_size print('\n{} mode benchmark result:'.format(mode)) print('Accuracy is {:.3f}'.format(acc)) print('Batch size = {}'.format(batch_size)) print('Latency: {:.3f} ms'.format(latency * 1000)) print('Throughput: {:.3f} images/sec'.format(1. / latency))
def test_tensorboard(self): model = copy.deepcopy(self.lpot_model) model.model.eval().fuse_model() quantizer = Quantization('dump_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model.model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = eval_func quantizer() self.assertTrue( True if os.path.exists('runs/eval/baseline_acc0.0') else False) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.eval_func = None quantizer() self.assertTrue( True if os.path.exists('runs/eval/baseline_acc0.0') else False)
def test_tensor_dump_and_set(self): model = copy.deepcopy(self.lpot_model) model.model.eval().fuse_model() quantizer = Quantization('ptq_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) dataloader = common.DataLoader(dataset) dataloader = common._generate_common_dataloader(dataloader, 'pytorch') quantizer.eval_dataloader = dataloader quantizer.calib_dataloader = dataloader quantizer.model = common.Model(model.model) q_model = quantizer() quantizer.strategy.adaptor.inspect_tensor( model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], iteration_list=[1, 2], inspect_type='all', save_to_disk=True) load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k) a = load_array('saved/dump_tensor/activation_iter1.npz') w = load_array('saved/dump_tensor/weight.npz') version = get_torch_version() if version >= '1.8': self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == a['conv1.0'].item()['conv1.0.output0'].shape[1]) else: self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == a['conv1.0'].item()['conv1.1.output0'].shape[1]) data = np.random.random( w['conv1.0'].item()['conv1.0.weight'].shape).astype(np.float32) quantizer.strategy.adaptor.set_tensor(q_model, {'conv1.0.weight': data}) changed_tensor = q_model.get_weight('conv1.weight') scales = changed_tensor.q_per_channel_scales() changed_tensor_fp32 = torch.dequantize(changed_tensor) self.assertTrue( np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy()))) quantizer.strategy.adaptor.inspect_tensor( q_model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], iteration_list=[1, 2], inspect_type='all', save_to_disk=False)
def benchmark_model( input_graph: str, config: str, benchmark_mode: str, framework: str, ) -> None: """Execute benchmark.""" from lpot.experimental import Benchmark, common if framework == "onnxrt": import onnx input_graph = onnx.load(input_graph) evaluator = Benchmark(config) evaluator.model = common.Model(input_graph) evaluator(benchmark_mode)
def test_fx_quant(self): version = get_torch_version() if version >= '1.8': model_origin = torchvision.models.resnet18() # run fx_quant in lpot and save the quantized GraphModule quantizer = Quantization('fx_ptq_yaml.yaml') dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = eval_func quantizer.model = common.Model(model_origin, **{'a': 1}) q_model = quantizer() q_model.save('./saved_static_fx') # Load configure and weights by lpot.utils model_fx = load("./saved_static_fx", model_origin, **{'a': 1}) self.assertTrue( isinstance(model_fx, torch.fx.graph_module.GraphModule))
def tune_model( input_graph: str, output_graph: str, config: str, framework: str, ) -> None: """Execute tuning.""" from lpot.experimental import Quantization, common if framework == "onnxrt": import onnx input_graph = onnx.load(input_graph) quantizer = Quantization(config) quantizer.model = common.Model(input_graph) quantized_model = quantizer() quantized_model.save(output_graph)
def test_quantization_saved(self): from lpot.utils.pytorch import load for fake_yaml in ['dynamic_yaml.yaml', 'ptq_yaml.yaml']: if fake_yaml == 'dynamic_yaml.yaml': model = torchvision.models.quantization.resnet18() else: model = copy.deepcopy(self.model) if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer() self.assertTrue(bool(q_model))
def main(_): graph = load_graph(FLAGS.input_graph) if FLAGS.mode == 'tune': from lpot.experimental import Quantization, common quantizer = Quantization(FLAGS.config) ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \ batch_size=FLAGS.batch_size) quantizer.model = common.Model(graph) quantizer.eval_func = eval_func q_model = quantizer() try: q_model.save(FLAGS.output_model) except Exception as e: print("Failed to save model due to {}".format(str(e))) elif FLAGS.mode == 'benchmark': eval_func(graph, FLAGS.iters) elif FLAGS.mode == 'accuracy': eval_func(graph, -1)
def benchmark_model( input_graph: str, config: str, benchmark_mode: str, framework: str, datatype: str = "", ) -> List[Dict[str, Any]]: """Execute benchmark.""" from lpot.experimental import Benchmark, common benchmark_results = [] if framework == "onnxrt": import onnx input_graph = onnx.load(input_graph) evaluator = Benchmark(config) evaluator.model = common.Model(input_graph) results = evaluator() for mode, result in results.items(): if benchmark_mode == mode: log.info(f"Mode: {mode}") acc, batch_size, result_list = result latency = (sum(result_list) / len(result_list)) / batch_size log.info(f"Batch size: {batch_size}") if mode == "accuracy": log.info(f"Accuracy: {acc:.3f}") elif mode == "performance": log.info(f"Latency: {latency * 1000:.3f} ms") log.info(f"Throughput: {1. / latency:.3f} images/sec") benchmark_results.append( { "precision": datatype, "mode": mode, "batch_size": batch_size, "accuracy": acc, "latency": latency * 1000, "throughput": 1.0 / latency, }, ) return benchmark_results
def test_set_tensor(self): quantizer = Quantization("static.yaml") quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() framework_specific_info = {"device": "cpu", "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, "backend": "qlinearops", "workspace_path": './lpot_workspace/{}/{}/'.format( 'onnxrt', 'imagenet')} framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) q_config = {'fused Conv_0': {'weight': {'granularity': 'per_channel', 'dtype': onnx_proto.TensorProto.INT8}}} adaptor.q_config = q_config adaptor.set_tensor(q_model, {'ConvBnFusion_W_features.0.0.weight': np.random.random([32, 3, 3, 3])}) adaptor.set_tensor(q_model, {'ConvBnFusion_BN_B_features.0.1.bias': np.random.random([32])})
def train(args, train_dataset, model, tokenizer): args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate_fn) def train_func(model): return take_train_steps(args, model, tokenizer, train_dataloader, prune) def eval_func(model): return take_eval_steps(args, model, tokenizer, prune) if args.prune: from lpot.experimental import Pruning, common prune = Pruning(args.config) prune.model = common.Model(model) prune.train_dataloader = train_dataloader prune.pruning_func = train_func prune.eval_dataloader = train_dataloader prune.eval_func = eval_func model = prune() torch.save(model, args.output_model)
def test_pruning(self): from lpot.experimental import Pruning, common prune = Pruning('fake.yaml') dummy_dataset = PyTorchDummyDataset([tuple([100, 3, 256, 256])]) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def training_func_for_lpot(model): epochs = 16 iters = 30 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) for nepoch in range(epochs): model.train() cnt = 0 prune.on_epoch_begin(nepoch) for image, target in dummy_dataloader: prune.on_batch_begin(cnt) print('.', end='') cnt += 1 output = model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() prune.on_batch_end() if cnt >= iters: break prune.on_epoch_end() dummy_dataset = PyTorchDummyDataset(tuple([100, 3, 256, 256]), label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.model = common.Model(self.model) prune.pruning_func = training_func_for_lpot prune.eval_dataloader = dummy_dataloader prune.train_dataloader = dummy_dataloader _ = prune()
def test_model_conversion(self): from lpot.experimental import ModelConversion, common conversion = ModelConversion() conversion.source = 'qat' conversion.destination = 'default' conversion.model = common.Model(self._qat_temp_path) q_model = conversion() q_model.save(self._quantized_temp_path) graph = tf.compat.v1.Graph() with graph.as_default(): with tf.compat.v1.Session() as sess: meta_graph = tf.compat.v1.saved_model.loader.load( sess, [tf.compat.v1.saved_model.tag_constants.SERVING], self._quantized_temp_path) print(meta_graph.graph_def.node) for i in meta_graph.graph_def.node: if 'MatMul' in i.op: self.assertTrue('QuantizedMatMul' in i.op) if 'MaxPool' in i.op: self.assertTrue('QuantizedMaxPool' in i.op) if 'Conv2D' in i.op: self.assertTrue('QuantizedConv2D' in i.op)
dummy_dataloader, benchmark=True) latency = np.array(results).mean() / args.eval_batch_size print('Latency: {:.3f} ms'.format(latency * 1000)) print('Throughput: {:.3f} items/sec'.format(args.eval_batch_size * 1. / latency)) print('--------------------------------------------------------------') if args.tune: from onnxruntime.transformers import optimizer from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions opt_options = BertOptimizationOptions('bert') opt_options.enable_embed_layer_norm = False model_optimizer = optimizer.optimize_model( args.model_path, 'bert', num_heads=12, hidden_size=768, optimization_options=opt_options) model = model_optimizer.model from lpot.experimental import Quantization, common quantize = Quantization(args.config) quantize.model = common.Model(model) quantize.calib_dataloader = eval_dataloader quantize.eval_func = eval_func q_model = quantize() q_model.save(args.output_model)