def test_autodump(self): from lpot import Quantization quantizer = Quantization('fake_yaml3.yaml') dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) dataloader = quantizer.dataloader(dataset) quantizer.model = self.constant_graph output_graph = quantizer(self.constant_graph, \ q_dataloader=dataloader, eval_dataloader=dataloader)
def main(): from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') # Do quantization quantizer.model = common.Model('./inception_v1.ckpt') quantized_model = quantizer()
def main(): quantizer = Quantization('./conf.yaml') dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True) quantizer.model = common.Model( './model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/' ) quantizer.calib_dataloader = common.DataLoader(dataset) quantized_model = quantizer()
def main(): class CalibrationDL(): def __init__(self): path = os.path.abspath( os.path.expanduser('./brats_cal_images_list.txt')) with open(path, 'r') as f: self.preprocess_files = [line.rstrip() for line in f] self.loaded_files = {} self.batch_size = 1 def __getitem__(self, sample_id): file_name = self.preprocess_files[sample_id] print("Loading file {:}".format(file_name)) with open( os.path.join('build/calib_preprocess/', "{:}.pkl".format(file_name)), "rb") as f: self.loaded_files[sample_id] = pickle.load(f)[0] # note that calibration phase does not care label, here we return 0 for label free case. return self.loaded_files[sample_id], 0 def __len__(self): self.count = len(self.preprocess_files) return self.count args = get_args() assert args.backend == "pytorch" model_path = os.path.join(args.model_dir, "plans.pkl") assert os.path.isfile( model_path), "Cannot find the model file {:}!".format(model_path) trainer, params = load_model_and_checkpoint_files( args.model_dir, folds=1, fp16=False, checkpoint_name='model_final_checkpoint') trainer.load_checkpoint_ram(params[0], False) model = trainer.network if args.tune: quantizer = Quantization('conf.yaml') quantizer.model = common.Model(model) quantizer.eval_func = eval_func quantizer.calib_dataloader = common.DataLoader(CalibrationDL()) q_model = quantizer() q_model.save('./lpot_workspace') exit(0) if args.benchmark: model.eval() if args.int8: from lpot.utils.pytorch import load new_model = load( os.path.abspath(os.path.expanduser('./lpot_workspace')), model) else: new_model = model eval_func(new_model)
def main(_): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument("--input-graph", help='Specify the slim model', dest='input_graph') arg_parser.add_argument("--output-graph", help='Specify tune result model save dir', dest='output_graph') arg_parser.add_argument("--config", default=None, help="tuning config") arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use lpot to tune.') args = arg_parser.parse_args() factory = TFSlimNetsFactory() # user specific model can register to slim net factory input_shape = [None, 299, 299, 3] factory.register('inception_v4', inception_v4, input_shape, inception_v4_arg_scope) if args.tune: from lpot.experimental import Quantization quantizer = Quantization(args.config) quantizer.model = args.input_graph q_model = quantizer() q_model.save(args.output_graph) if args.benchmark: from lpot.experimental import Benchmark evaluator = Benchmark(args.config) evaluator.model = args.input_graph results = evaluator() for mode, result in results.items(): acc, batch_size, result_list = result latency = np.array(result_list).mean() / batch_size print('\n{} mode benchmark result:'.format(mode)) print('Accuracy is {:.3f}'.format(acc)) print('Batch size = {}'.format(batch_size)) print('Latency: {:.3f} ms'.format(latency * 1000)) print('Throughput: {:.3f} images/sec'.format(1. / latency))
def quantize(model, q_data, e_data): from lpot.experimental import Quantization, common from lpot.experimental.common import DataLoader quantizer = Quantization('fake_yaml.yaml') q_dataloader = DataLoader(dataset=list(zip(q_data[0], q_data[1]))) e_dataloader = DataLoader(dataset=list(zip(e_data[0], e_data[1]))) quantizer.model = common.Model(model) quantizer.calib_dataloader = q_dataloader quantizer.eval_dataloader = e_dataloader quantized_model = quantizer() return quantized_model
def main(_): if FLAGS.benchmark: run_benchmark() else: FLAGS.batch_size = 1 from lpot.experimental import Quantization, common quantizer = Quantization(FLAGS.config) quantizer.model = common.Model(FLAGS.input_graph) kwargs = {'conf_threshold': FLAGS.conf_threshold, 'iou_threshold': FLAGS.iou_threshold} quantizer.postprocess = common.Postprocess(NMS, 'NMS', **kwargs) q_model = quantizer() q_model.save(FLAGS.output_graph)
def run(self): if self.args.tune: from lpot.experimental import Quantization quantizer = Quantization(self.args.config) quantizer.model = self.args.input_graph q_model = quantizer() q_model.save(self.args.output_model) if self.args.benchmark: from lpot.experimental import Benchmark evaluator = Benchmark(self.args.config) evaluator.model = self.args.input_graph evaluator(self.args.mode)
def test_fx_dynamic_quant(self): # Model Definition class LSTMModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5): super(LSTMModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) self.init_weights() self.nhid = nhid self.nlayers = nlayers def init_weights(self): initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.decoder.bias.data.zero_() self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden): emb = self.drop(self.encoder(input)) output, hidden = self.rnn(emb, hidden) output = self.drop(output) decoded = self.decoder(output) return decoded, hidden version = get_torch_version() if version >= '1.8': model = LSTMModel( ntoken=10, ninp=512, nhid=256, nlayers=5, ) # run fx_quant in lpot and save the quantized GraphModule model.eval() quantizer = Quantization('fx_dynamic_yaml.yaml') quantizer.model = common.Model(model, **{'a': 1}) q_model = quantizer() q_model.save('./saved_dynamic_fx') # Load configure and weights by lpot.utils model_fx = load("./saved_dynamic_fx", model, **{'a': 1}) if version >= '1.8': self.assertTrue( isinstance(model_fx, torch.fx.graph_module.GraphModule)) else: self.assertTrue( isinstance(model_fx, torch._fx.graph_module.GraphModule))
def test_bf16_rnn(self): os.environ['FORCE_BF16'] = '1' inp = tf.keras.layers.Input(shape=(None, 4)) lstm_1 = tf.keras.layers.LSTM(units=10, return_sequences=True)(inp) dropout_1 = tf.keras.layers.Dropout(0.2)(lstm_1) lstm_2 = tf.keras.layers.LSTM(units=10, return_sequences=False)(dropout_1) dropout_2 = tf.keras.layers.Dropout(0.2)(lstm_2) out = tf.keras.layers.Dense(1)(dropout_2) model = tf.keras.models.Model(inputs=inp, outputs=out) model.compile(loss="mse", optimizer=tf.keras.optimizers.RMSprop()) # input_names = [t.name.split(":")[0] for t in model.inputs] output_names = [t.name.split(":")[0] for t in model.outputs] q_data = np.random.randn(64, 10, 4) label = np.random.randn(64, 1) model.predict(q_data) sess = tf.keras.backend.get_session() graph = sess.graph from tensorflow.python.framework import graph_util graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), output_names, ) quant_data = (q_data, label) evl_data = (q_data, label) from lpot.experimental import Quantization, common quantizer = Quantization('fake_bf16_rnn.yaml') quantizer.calib_dataloader = common.DataLoader( dataset=list(zip(quant_data[0], quant_data[1]))) quantizer.eval_dataloader = common.DataLoader( dataset=list(zip(evl_data[0], evl_data[1]))) quantizer.model = graph_def quantized_model = quantizer() convert_to_bf16_flag = False for i in quantized_model.graph_def.node: if i.name == 'lstm/while/MatMul_3' and \ i.attr['T'].type == dtypes.bfloat16.as_datatype_enum: convert_to_bf16_flag = True self.assertEqual(convert_to_bf16_flag, True)
def run(self): """ This is lpot function include tuning and benchmark option """ if self.args.tune: from lpot.experimental import Quantization, common quantizer = Quantization(self.args.config) quantizer.model = common.Model(self.args.input_graph) q_model = quantizer() q_model.save(self.args.output_graph) if self.args.benchmark: from lpot.experimental import Benchmark, common evaluator = Benchmark(self.args.config) evaluator.model = common.Model(self.args.input_graph) evaluator(self.args.mode)
def main(_): tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) if FLAGS.benchmark: from lpot.experimental import Benchmark evaluator = Benchmark(FLAGS.config) evaluator.model = FLAGS.input_model evaluator(FLAGS.mode) elif FLAGS.tune: from lpot.experimental import Quantization quantizer = Quantization(FLAGS.config) quantizer.model = FLAGS.input_model q_model = quantizer() q_model.save(FLAGS.output_model)
def main(_): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument("--input-graph", help='Specify the slim model', dest='input_graph') arg_parser.add_argument("--output-graph", help='Specify tune result model save dir', dest='output_graph') arg_parser.add_argument("--config", default=None, help="tuning config") arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use lpot to tune.') args = arg_parser.parse_args() factory = TFSlimNetsFactory() # user specific model can register to slim net factory input_shape = [None, 299, 299, 3] factory.register('inception_v4', inception_v4, input_shape, inception_v4_arg_scope) if args.tune: from lpot.experimental import Quantization quantizer = Quantization(args.config) quantizer.model = args.input_graph q_model = quantizer() q_model.save(args.output_graph) if args.benchmark: from lpot.experimental import Benchmark evaluator = Benchmark(args.config) evaluator.model = args.input_graph evaluator(args.mode)
def test_quantizate(self): from lpot.experimental import Quantization, common for fake_yaml in ["static_yaml.yaml", "dynamic_yaml.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.rn50_model) q_model = quantizer() eval_func(q_model) for fake_yaml in ["non_MSE_yaml.yaml"]: quantizer = Quantization(fake_yaml) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() eval_func(q_model)
def auto_tune(self): """This is lpot tuning part to generate a quantized pb Returns: graph: it will return a quantized pb """ from lpot.experimental import Quantization infer_graph = load_graph(self.args.input_graph) quantizer = Quantization(self.args.config) if self.args.calib_data: quantizer.model = infer_graph quantizer.calib_dataloader = Dataloader(self.args.calib_data, self.args.batch_size) quantizer.eval_func = self.eval_inference q_model = quantizer() return q_model else: print("Please provide calibration dataset!")
def test_quantization_saved(self): for fake_yaml in [ 'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml' ]: if fake_yaml == 'dynamic_yaml.yaml': model = torchvision.models.resnet18() else: model = copy.deepcopy(self.model) if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) if fake_yaml == 'qat_yaml.yaml': quantizer.q_func = q_func else: quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer() q_model.save('./saved') # Load configure and weights by lpot.utils saved_model = load("./saved", model) eval_func(saved_model) shutil.rmtree('./saved', ignore_errors=True) from lpot.experimental import Benchmark evaluator = Benchmark('ptq_yaml.yaml') # Load configure and weights by lpot.model evaluator.model = common.Model(model) evaluator.b_dataloader = common.DataLoader(dataset) evaluator() evaluator.model = common.Model(model) evaluator()
def test_quantization_saved(self): from lpot.utils.pytorch import load for fake_yaml in [ 'dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml' ]: if fake_yaml == 'dynamic_yaml.yaml': model = torchvision.models.resnet18() else: model = copy.deepcopy(self.model) if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) if fake_yaml == 'qat_yaml.yaml': quantizer.q_func = q_func q_model = quantizer() q_model.save('./saved') # Load configure and weights by lpot.utils saved_model = load("./saved", model) eval_func(saved_model) from lpot.experimental import Benchmark evaluator = Benchmark('ptq_yaml.yaml') # Load configure and weights by lpot.model evaluator.model = common.Model(model) evaluator.b_dataloader = common.DataLoader(dataset) results = evaluator() evaluator.model = common.Model(model) fp32_results = evaluator() self.assertTrue( (fp32_results['accuracy'][0] - results['accuracy'][0]) < 0.01)
def tune_model( input_graph: str, output_graph: str, config: str, framework: str, ) -> None: """Execute tuning.""" from lpot.experimental import Quantization, common if framework == "onnxrt": import onnx input_graph = onnx.load(input_graph) quantizer = Quantization(config) quantizer.model = common.Model(input_graph) quantized_model = quantizer() quantized_model.save(output_graph)
def main(): from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb") quantized_model = quantizer() # Optional, run benchmark from lpot.experimental import Benchmark evaluator = Benchmark('./conf.yaml') evaluator.model = common.Model(quantized_model.graph_def) results = evaluator() batch_size = 1 for mode, result in results.items(): acc, batch_size, result_list = result latency = np.array(result_list).mean() / batch_size print('Accuracy is {:.3f}'.format(acc)) print('Latency: {:.3f} ms'.format(latency * 1000))
def test_tensor_dump(self): model = copy.deepcopy(self.lpot_model) model.model.eval().fuse_model() quantizer = Quantization('ptq_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) dataloader = common.DataLoader(dataset) dataloader = common._generate_common_dataloader(dataloader, 'pytorch') self.adaptor.inspect_tensor(model, dataloader, op_list=['conv1', 'layer1.0.conv1'], iteration_list=[1, 2], weights=True, save_to_disk=True) load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k) a = load_array('dump_tensor/activation_iter1.npz') w = load_array('dump_tensor/weight.npz') self.assertTrue(w['conv1'].item()['conv1.0.weight'].shape[0] == a['conv1'].item()['conv1.output0'].shape[1]) shutil.rmtree('./dump_tensor', ignore_errors=True)
def test_performance(self): from lpot.data import DATASETS dataset = DATASETS('tensorflow')['dummy']((100, 256, 256, 1), label=True) from lpot.experimental import Quantization, common from lpot.utils.utility import get_size quantizer = Quantization('fake_yaml.yaml') quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph q_model = quantizer() from lpot.experimental import Benchmark, common benchmarker = Benchmark('fake_yaml.yaml') benchmarker.b_dataloader = common.DataLoader(dataset) benchmarker.model = self.constant_graph_1 benchmarker()
def main(_): graph = load_graph(FLAGS.input_graph) if FLAGS.mode == 'tune': from lpot.experimental import Quantization, common quantizer = Quantization(FLAGS.config) ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \ batch_size=FLAGS.batch_size) quantizer.model = common.Model(graph) quantizer.eval_func = eval_func q_model = quantizer() try: q_model.save(FLAGS.output_model) except Exception as e: print("Failed to save model due to {}".format(str(e))) elif FLAGS.mode == 'benchmark': eval_func(graph, FLAGS.iters) elif FLAGS.mode == 'accuracy': eval_func(graph, -1)
def test_set_tensor(self): quantizer = Quantization("static.yaml") quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() framework_specific_info = {"device": "cpu", "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, "backend": "qlinearops", "workspace_path": './lpot_workspace/{}/{}/'.format( 'onnxrt', 'imagenet')} framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) q_config = {'fused Conv_0': {'weight': {'granularity': 'per_channel', 'dtype': onnx_proto.TensorProto.INT8}}} adaptor.q_config = q_config adaptor.set_tensor(q_model, {'ConvBnFusion_W_features.0.0.weight': np.random.random([32, 3, 3, 3])}) adaptor.set_tensor(q_model, {'ConvBnFusion_BN_B_features.0.1.bias': np.random.random([32])})
def main(): arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--benchmark', action='store_true', help='run benchmark') arg_parser.add_argument('--tune', action='store_true', help='run tuning') args = arg_parser.parse_args() if args.tune: from lpot.experimental import Quantization, common quantizer = Quantization('./conf.yaml') quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb") quantized_model = quantizer() quantized_model.save('./int8.pb') if args.benchmark: from lpot.experimental import Benchmark, common evaluator = Benchmark('./conf.yaml') evaluator.model = common.Model('int8.pb') evaluator(mode='accuracy')
def test_matmul_with_nan(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) nan_array = np.empty((2, 2), dtype=np.float32) nan_array[:] = np.NaN x = tf.placeholder(tf.float32, shape=[2, 2], name='x') z = tf.matmul(x, nan_array, name='no_quant_matmul') z = tf.identity(z, name='op_to_store') found_quantized_matmul = True with tf.Session() as sess: sess.run(z, feed_dict={x: x_data}) float_graph_def = sess.graph.as_graph_def() from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() for i in output_graph.graph_def.node: if i.op == 'MatMul': found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, False)
def test_fold_pad_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed, name='op_to_store') out_name = relu.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() found_pad = False if tf.__version__ >= "2.0.0": for i in output_graph.graph_def.node: if i.op == 'Pad': found_pad = True break self.assertEqual(found_pad, True)
def test_conv_fusion_with_max_pooling(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") relu = tf.nn.relu(x) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(pooling, conv_weights, strides=[1, 2, 2, 1], padding="VALID") biasadd = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') out_name = biasadd.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() quantized_pool_data_type = None quantized_conv_data_type = None for i in output_graph.graph_def.node: if i.op.find("QuantizedMaxPool") != -1: quantized_pool_data_type = i.attr['T'].type if i.op.find("QuantizedConv2D") != -1: quantized_conv_data_type = i.attr['Tinput'].type self.assertNotEqual(quantized_pool_data_type, None) self.assertEqual(quantized_pool_data_type, quantized_conv_data_type)
def test_disable_matmul_fusion(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float) x = tf.placeholder(tf.float32, shape=[2, 2], name='x') y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y, name='no_quant_matmul') z = tf.nn.relu6(z, name='op_to_store') found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() for i in output_graph.graph_def.node: if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store': found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, False)
def test_first_matmul_biasadd_relu_fusion(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float) x = tf.placeholder(tf.float32, shape=[2, 2], name='x') y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) z = tf.nn.relu(z, name='op_to_store') with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() found_quantized_matmul = False for i in output_graph.graph_def.node: if i.op == 'QuantizeV2' and i.name == 'MatMul_eightbit_quantize_x' and i.attr[ "T"].type == dtypes.quint8: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True)
def test_loss_calculation(self): from lpot.strategy.tpe import TpeTuneStrategy from lpot.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph testObject = TpeTuneStrategy(quantizer.model, quantizer.conf, quantizer.calib_dataloader) testObject._calculate_loss_function_scaling_components( 0.01, 2, testObject.loss_function_config) # check if latency difference between min and max corresponds to 10 points of loss function tmp_val = testObject.calculate_loss(0.01, 2, testObject.loss_function_config) tmp_val2 = testObject.calculate_loss(0.01, 1, testObject.loss_function_config) self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) # check if 1% of acc difference corresponds to 10 points of loss function tmp_val = testObject.calculate_loss(0.02, 2, testObject.loss_function_config) tmp_val2 = testObject.calculate_loss(0.03, 2, testObject.loss_function_config) self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False)