def test_disable_matmul_fusion(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float) x = tf.placeholder(tf.float32, shape=[2, 2], name='x') y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y, name='no_quant_matmul') z = tf.nn.relu6(z, name='op_to_store') found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() for i in output_graph.graph_def.node: if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store': found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, False)
def test_first_matmul_biasadd_relu_fusion(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float) x = tf.placeholder(tf.float32, shape=[2, 2], name='x') y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) z = tf.nn.relu(z, name='op_to_store') with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() found_quantized_matmul = False for i in output_graph.graph_def.node: if i.op == 'QuantizeV2' and i.name == 'MatMul_eightbit_quantize_x' and i.attr["T"].type == dtypes.quint8: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True)
def test_loss_calculation(self): from lpot.strategy.tpe import TpeTuneStrategy from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph testObject = TpeTuneStrategy(quantizer.model, quantizer.conf, quantizer.calib_dataloader) testObject._calculate_loss_function_scaling_components( 0.01, 2, testObject.loss_function_config) # check if latency difference between min and max corresponds to 10 points of loss function tmp_val = testObject.calculate_loss(0.01, 2, testObject.loss_function_config) tmp_val2 = testObject.calculate_loss(0.01, 1, testObject.loss_function_config) self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) # check if 1% of acc difference corresponds to 10 points of loss function tmp_val = testObject.calculate_loss(0.02, 2, testObject.loss_function_config) tmp_val2 = testObject.calculate_loss(0.03, 2, testObject.loss_function_config) self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False)
def test_tuning_ipex(self): from lpot import Quantization model = torchvision.models.resnet18() model = MODELS['pytorch_ipex'](model) quantizer = Quantization('ipex_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) lpot_model = quantizer() lpot_model.save("./saved") new_model = MODELS['pytorch_ipex'](model.model, { "workspace_path": "./saved" }) new_model.model.to(ipex.DEVICE) try: script_model = torch.jit.script(new_model.model) except: script_model = torch.jit.trace( new_model.model, torch.randn(10, 3, 224, 224).to(ipex.DEVICE)) from lpot import Benchmark evaluator = Benchmark('ipex_yaml.yaml') evaluator.model = common.Model(script_model) evaluator.b_dataloader = common.DataLoader(dataset) results = evaluator()
def test_quantization_saved(self): from lpot.utils.pytorch import load model = copy.deepcopy(self.model) for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']: if fake_yaml == 'ptq_yaml.yaml': model.eval().fuse_model() quantizer = Quantization(fake_yaml) dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) if fake_yaml == 'qat_yaml.yaml': quantizer.q_func = q_func q_model = quantizer() q_model.save('./saved') # Load configure and weights by lpot.utils saved_model = load("./saved", model) eval_func(saved_model) from lpot import Benchmark evaluator = Benchmark('ptq_yaml.yaml') # Load configure and weights by lpot.model evaluator.model = common.Model(model) evaluator.b_dataloader = common.DataLoader(dataset) results = evaluator() evaluator.model = common.Model(model) fp32_results = evaluator() self.assertTrue( (fp32_results['accuracy'][0] - results['accuracy'][0]) < 0.01)
def test_dump_tensor_to_disk(self): import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer() with open(self.calibration_log_path) as f: data = f.readlines() found_min_str = False found_max_str = False for i in data: if i.find('__print__;__max') != -1: found_max_str = True if i.find('__print__;__min') != -1: found_min_str = True self.assertEqual(os.path.exists(self.calibration_log_path), True) self.assertGreater(len(data), 1) self.assertEqual(found_min_str, True) self.assertEqual(found_max_str, True)
def test_ru_mse_max_trials(self): from lpot import Quantization, common quantizer = Quantization('fake_yaml2.yaml') dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer()
def test_autodump(self): from lpot import Quantization, common quantizer = Quantization('fake_yaml3.yaml') dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph output_graph = quantizer()
def test_conv_fusion_with_last_matmul(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) # paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) # x_pad = tf.pad(top_relu, paddings, "CONSTANT") conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") reshape = tf.reshape(pooling, [-1, 3136]) y_data = np.random.random([3136, 1]) y = tf.constant(y_data, dtype=tf.float32, shape=[3136, 1]) z = tf.matmul(reshape, y) y_data_1 = np.random.random([1, 1]) y_1 = tf.constant(y_data_1, dtype=tf.float32, shape=[1, 1]) z_2nd_matmul = tf.matmul(z, y_1) relu6 = tf.nn.relu6(z_2nd_matmul, name='op_to_store') out_name = relu6.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() quantize_v2_count = 0 for i in output_graph.graph_def.node: if i.op == 'QuantizeV2': quantize_v2_count += 1 break self.assertEqual(quantize_v2_count, 1)
def test_tensorflow_graph_meta_pass(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) sq = tf.squeeze(relu, [0]) reshape = tf.reshape(sq, [1, 27, 27, 16]) conv_weights2 = tf.compat.v1.get_variable( "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv2 = tf.nn.conv2d(reshape, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu6 = tf.nn.relu6(normed2, name='op_to_store') out_name = relu6.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() quantize_count = 0 dequantize_count = 0 for i in output_graph.graph_def.node: if i.op == 'QuantizeV2': quantize_count += 1 if i.op == 'Dequantize': dequantize_count += 1 self.assertEqual(quantize_count, 1) self.assertEqual(dequantize_count, 1)
def test_run_bayesian_max_trials(self): from lpot import Quantization, common quantizer = Quantization('fake_yaml2.yaml') dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.test_graph output_graph = quantizer()
def test_conv_biasadd_addv2_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) conv_weights2 = tf.compat.v1.get_variable( "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') relu = tf.nn.relu(add) relu6 = tf.nn.relu6(relu, name='op_to_store') out_name = relu6.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() found_conv_fusion = False for i in output_graph.graph_def.node: if i.op == 'QuantizedConv2DWithBiasSignedSumAndReluAndRequantize': found_conv_fusion = True break self.assertEqual(found_conv_fusion, True)
def test_run_basic_one_trial(self): from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', (1, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer() self.assertTrue(True if len(os.listdir("./runs/eval")) > 2 else False)
def test_disable_scale_propagation(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") conv_weights = tf.compat.v1.get_variable( "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer()) conv_bias = tf.compat.v1.get_variable( "bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pool = tf.nn.avg_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) final_node = tf.nn.relu(x, name='op_to_store') out_name = final_node.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization( 'fake_yaml_disable_scale_propagation.yaml') dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() max_freezed_out = [] for i in output_graph.graph_def.node: if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': max_freezed_out.append(i.input[-1]) self.assertEqual(2, len(set(max_freezed_out)))
def test_bf16_rnn(self): os.environ['FORCE_BF16'] = '1' inp = tf.keras.layers.Input(shape=(None, 4)) lstm_1 = tf.keras.layers.LSTM(units=10, return_sequences=True)(inp) dropout_1 = tf.keras.layers.Dropout(0.2)(lstm_1) lstm_2 = tf.keras.layers.LSTM(units=10, return_sequences=False)(dropout_1) dropout_2 = tf.keras.layers.Dropout(0.2)(lstm_2) out = tf.keras.layers.Dense(1)(dropout_2) model = tf.keras.models.Model(inputs=inp, outputs=out) model.compile(loss="mse", optimizer=tf.keras.optimizers.RMSprop()) # input_names = [t.name.split(":")[0] for t in model.inputs] output_names = [t.name.split(":")[0] for t in model.outputs] q_data = np.random.randn(64, 10, 4) label = np.random.randn(64, 1) model.predict(q_data) sess = tf.keras.backend.get_session() graph = sess.graph from tensorflow.python.framework import graph_util graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), output_names, ) quant_data = (q_data, label) evl_data = (q_data, label) from lpot import Quantization, common quantizer = Quantization('fake_bf16_rnn.yaml') quantizer.calib_dataloader = common.DataLoader( dataset=list(zip(quant_data[0], quant_data[1]))) quantizer.eval_dataloader = common.DataLoader( dataset=list(zip(evl_data[0], evl_data[1]))) quantizer.model = graph_def quantized_model = quantizer() convert_to_bf16_flag = False for i in quantized_model.graph_def.node: if i.name == 'lstm/while/MatMul_3' and \ i.attr['T'].type == dtypes.bfloat16.as_datatype_enum: convert_to_bf16_flag = True self.assertEqual(convert_to_bf16_flag, True)
def test_fold_pad_conv2(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) paddings2 = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad2 = tf.pad(x, paddings2, "CONSTANT") conv_weights2 = tf.compat.v1.get_variable( "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv2 = tf.nn.conv2d(x_pad2, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) add = tf.math.add(relu, relu2, name='op_to_store') out_name = add.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() found_pad = False if tf.__version__ >= "2.0.0": for i in output_graph.graph_def.node: if i.op == 'Pad': found_pad = True break self.assertEqual(found_pad, True)
def test_no_input_output_config(self): g = GraphAnalyzer() g.graph = self.input_graph g.parse_graph() float_graph_def = g.dump_graph() from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() self.assertGreater(len(output_graph.graph_def.node), 0)
def test_tensor_dump(self): model = copy.deepcopy(self.lpot_model) model.model.eval().fuse_model() quantizer = Quantization('dump_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) quantizer.model = common.Model(model.model) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = eval_func quantizer() self.assertTrue( True if os.path.exists('runs/eval/baseline_acc0.0') else False) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer() self.assertTrue( True if os.path.exists('runs/eval/baseline_acc0.0') else False)
def test_invalid_input_output_config(self): g = GraphAnalyzer() g.graph = self.input_graph g.parse_graph() float_graph_def = g.dump_graph() from lpot import Quantization, common quantizer = Quantization('fake_yaml_2.yaml') dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def model = quantizer() # will detect the right inputs/outputs self.assertNotEqual(model.input_node_names, ['x']) self.assertNotEqual(model.output_node_names, ['op_to_store'])
def test_bf16_fallback(self): os.environ['FORCE_BF16'] = '1' from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.test_graph output_graph = quantizer() cast_op_count = 0 for node in output_graph.graph_def.node: if node.op == 'Cast': cast_op_count += 1 self.assertTrue(cast_op_count >= 1)
def test_enable_first_quantization(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) relu6 = tf.nn.relu6(relu, name='op_to_store') out_name = relu6.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization( 'fake_yaml_enable_first_quantization.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() found_fp32_conv = False for i in output_graph.graph_def.node: if i.op == 'Conv2D': found_fp32_conv = True break self.assertEqual(found_fp32_conv, False)
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True) quantizer.model = common.Model( './model/public/rfcn-resnet101-coco-tf/model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/' ) quantizer.calib_dataloader = common.DataLoader(dataset) quantized_model = quantizer()
def test_autosave(self): from lpot import Quantization, common from lpot.utils.utility import get_size quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', (100, 256, 256, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer() q_model = quantizer() quantizer.model = self.constant_graph_1 q_model_1 = quantizer() self.assertTrue((get_size(q_model_1.sess.graph) - get_size(q_model.sess.graph)) > 0)
def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float) x = tf.placeholder(tf.float32, shape=[2, 2], name='x') y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) biasadd = tf.nn.bias_add(z, [1, 2]) biasadd1 = tf.nn.bias_add(biasadd, [1, 1]) y1 = tf.constant(x_data, dtype=tf.float32, shape=[2, 2]) matmul1 = tf.matmul(biasadd1, y1) biasadd2 = tf.nn.bias_add(matmul1, [1, 1]) z = tf.nn.softmax(biasadd2, name='op_to_store') found_quantized_matmul = False if tf.version.VERSION < "2.2.0": found_quantized_matmul = False else: with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer() count=0 for i in output_graph.model.node: if i.op == 'QuantizedMatMulWithBiasAndDequantize': count += 1 found_quantized_matmul = bool(count > 1) self.assertEqual(found_quantized_matmul, False)
def test_register_metric_postprocess(self): import PIL.Image image = np.array(PIL.Image.open(self.image_path)) resize_image = np.resize(image, (224, 224, 3)) mean = [123.68, 116.78, 103.94] resize_image = resize_image - mean images = np.expand_dims(resize_image, axis=0) labels = [768] from lpot import Benchmark, Quantization, common from lpot.data.transforms.imagenet_transform import LabelShift from lpot.metric.metric import TensorflowTopK evaluator = Benchmark('fake_yaml.yaml') evaluator.postprocess = common.Postprocess(LabelShift, 'label_benchmark', label_shift=1) evaluator.metric = common.Metric(TensorflowTopK, 'topk_benchmark') evaluator.b_dataloader = common.DataLoader( dataset=list(zip(images, labels))) evaluator.model = self.pb_path result = evaluator() acc, batch_size, result_list = result['accuracy'] self.assertEqual(acc, 0.0) quantizer = Quantization('fake_yaml.yaml') quantizer.postprocess = common.Postprocess(LabelShift, 'label_quantize', label_shift=1) quantizer.metric = common.Metric(TensorflowTopK, 'topk_quantize') evaluator = Benchmark('fake_yaml.yaml') evaluator.metric = common.Metric(TensorflowTopK, 'topk_second') evaluator.b_dataloader = common.DataLoader( dataset=list(zip(images, labels))) evaluator.model = self.pb_path result = evaluator() acc, batch_size, result_list = result['accuracy'] self.assertEqual(acc, 0.0)
def test_tensorflow_concat_quantization(self): output_graph_def = read_graph(self.pb_path) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 299, 299, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() found_quantized_concat_node = False target_concat_node_name = 'v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2' from lpot.adaptor.tf_utils.graph_rewriter.graph_util import GraphAnalyzer cur_graph = GraphAnalyzer() cur_graph.graph = output_graph.graph_def graph_info = cur_graph.parse_graph() found_quantized_concat_node = target_concat_node_name in graph_info self.assertEqual(found_quantized_concat_node, True) min_out, max_out = [], [] for input_conv_name in graph_info[ target_concat_node_name].node.input[:4]: # print (input_conv_name, graph_info[input_conv_name].node.input) min_freezed_out_name = graph_info[input_conv_name].node.input[-2] max_freezed_out_name = graph_info[input_conv_name].node.input[-1] min_freezed_out_value = (graph_info[min_freezed_out_name].node. attr['value'].tensor.float_val)[0] max_freezed_out_value = (graph_info[max_freezed_out_name].node. attr['value'].tensor.float_val)[0] min_out.append(min_freezed_out_value) max_out.append(max_freezed_out_value) self.assertEqual(len(set(min_out)), 1) self.assertEqual(len(set(max_out)), 1)
def test_quantizate(self): from lpot import Quantization, common for fake_yaml in ["static_yaml.yaml", "dynamic_yaml.yaml"]: quantizer = Quantization(fake_yaml) dataset = quantizer.dataset("dummy", (100, 3, 224, 224), low=0., high=1., label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = common.Model(self.rn50_model) q_model = quantizer() eval_func(q_model) for fake_yaml in ["non_MSE_yaml.yaml"]: quantizer = Quantization(fake_yaml) dataset = quantizer.dataset("dummy", (100, 3, 224, 224), low=0., high=1., label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = common.Model(self.mb_v2_model) q_model = quantizer() eval_func(q_model)
def main(_): graph = load_graph(FLAGS.input_graph) if FLAGS.mode == 'tune': from lpot import Quantization, common quantizer = Quantization(FLAGS.config) ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \ batch_size=FLAGS.batch_size) quantizer.model = common.Model(graph) quantizer.eval_func = eval_func q_model = quantizer() try: q_model.save(FLAGS.output_model) except Exception as e: print("Failed to save model due to {}".format(str(e))) elif FLAGS.mode == 'benchmark': eval_func(graph, FLAGS.iters) elif FLAGS.mode == 'accuracy': eval_func(graph, -1)
def test_conv_fusion_with_last_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) conv_weights = tf.compat.v1.get_variable( "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") conv_weights_2 = tf.compat.v1.get_variable( "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) conv2 = tf.nn.conv2d(pooling, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID") conv_weights_3 = tf.compat.v1.get_variable( "weight3", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()) relu2 = tf.nn.relu(conv2) conv3 = tf.nn.conv2d(relu2, conv_weights_3, strides=[1, 2, 2, 1], padding="VALID") relu3 = tf.nn.relu(conv3) relu6 = tf.nn.relu6(relu3, name='op_to_store') out_name = relu6.name.split(':')[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]) from lpot import Quantization, common quantizer = Quantization('fake_yaml.yaml') dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer() quantize_v2_count = 0 for i in output_graph.graph_def.node: if i.op == 'QuantizeV2': quantize_v2_count += 1 break self.assertEqual(quantize_v2_count, 1)
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model checkpoints and predictions will be written." ) # Other parameters parser.add_argument( "--data_dir", default=None, type=str, help= "The input data dir. Should contain the .json files for the task. If not specified, will run with tensorflow_datasets." ) parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name") parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3") parser.add_argument( '--version_2_with_negative', action='store_true', help= 'If true, the SQuAD examples contain some that do not have an answer.') parser.add_argument( '--null_score_diff_threshold', type=float, default=0.0, help= "If null_score - best_non_null is greater than the threshold predict null." ) parser.add_argument( "--max_seq_length", default=384, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument( "--doc_stride", default=128, type=int, help= "When splitting up a long document into chunks, how much stride to take between chunks." ) parser.add_argument( "--max_query_length", default=64, type=int, help= "The maximum number of tokens for the question. Questions longer than this will " "be truncated to this length.") parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument( "--evaluate_during_training", action='store_true', help="Rul evaluation during training at each logging step.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs." ) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument( "--n_best_size", default=20, type=int, help= "The total number of n-best predictions to generate in the nbest_predictions.json output file." ) parser.add_argument( "--max_answer_length", default=30, type=int, help= "The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument( "--verbose_logging", action='store_true', help= "If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") parser.add_argument('--logging_steps', type=int, default=50, help="Log every X updates steps.") parser.add_argument('--save_steps', type=int, default=50, help="Save checkpoint every X updates steps.") parser.add_argument( "--eval_all_checkpoints", action='store_true', help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number" ) parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--overwrite_output_dir', action='store_true', help="Overwrite the content of the output directory") parser.add_argument( '--overwrite_cache', action='store_true', help="Overwrite the cached training and evaluation sets") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument( '--fp16', action='store_true', help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit" ) parser.add_argument( '--fp16_opt_level', type=str, default='O1', help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") parser.add_argument("--do_calibration", action='store_true', help="Whether to do calibration.") parser.add_argument("--do_int8_inference", action='store_true', help="Whether to run int8 inference.") parser.add_argument("--do_fp32_inference", action='store_true', help="Whether to run fp32 inference.") parser.add_argument("--mkldnn_eval", action='store_true', help="evaluation with MKLDNN") parser.add_argument( "--tune", action='store_true', help="run Low Precision Optimization Tool to tune int8 acc.") parser.add_argument("--task_name", default=None, type=str, required=True, help="SQuAD task") parser.add_argument("--warmup", type=int, default=5, help="warmup for performance") parser.add_argument('-i', "--iter", default=0, type=int, help='For accuracy measurement only.') parser.add_argument('--config', type=str, default='conf.yaml', help="yaml config file") parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') parser.add_argument('-r', "--accuracy_only", dest='accuracy_only', action='store_true', help='For accuracy measurement only.') parser.add_argument( "--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH', help= 'path to checkpoint tuned by Low Precision Optimization Tool (default: ./)' ) parser.add_argument('--int8', dest='int8', action='store_true', help='run benchmark') args = parser.parse_args() args.predict_file = os.path.join( args.output_dir, 'predictions_{}_{}.txt'.format( list(filter(None, args.model_name_or_path.split('/'))).pop(), str(args.max_seq_length))) if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) mix_qkv = False if args.do_calibration or args.do_int8_inference or args.tune: mix_qkv = True # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend='nccl') args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16) # Set seed set_seed(args) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config, mix_qkv=mix_qkv, cache_dir=args.cache_dir if args.cache_dir else None) if args.local_rank == 0: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will # remove the need for this code, but it is still valid. if args.fp16: try: import apex apex.amp.register_half_function(torch, 'einsum') except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) # Training if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Save the trained model and the tokenizer if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = model.module if hasattr( model, 'module') else model # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, 'training_args.bin')) # Load a trained model and vocabulary that you have fine-tuned model = model_class.from_pretrained(args.output_dir, force_download=True, mix_qkv=mix_qkv) tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if args.do_eval and args.local_rank in [-1, 0]: checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce model loading logs logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split( '-')[-1] if len(checkpoints) > 1 else "" if args.mkldnn_eval or args.do_fp32_inference: model = model_class.from_pretrained(checkpoint, force_download=True) model.to(args.device) # Evaluate result, _ = evaluate(args, model, tokenizer, prefix=global_step) result = dict( (k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) if args.tune: def eval_func_for_lpot(model): result, _ = evaluate(args, model, tokenizer) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) bert_task_acc_keys = [ 'best_f1', 'f1', 'mcc', 'spearmanr', 'acc' ] for key in bert_task_acc_keys: if key in result.keys(): logger.info("Finally Eval {}:{}".format( key, result[key])) acc = result[key] break return acc model = model_class.from_pretrained(checkpoint, force_download=True, mix_qkv=True) model.to(args.device) dataset = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=False) args.eval_batch_size = args.per_gpu_eval_batch_size * max( 1, args.n_gpu) eval_task = "squad" from lpot import Quantization, common quantizer = Quantization(args.config) dataset = quantizer.dataset('bert', dataset=dataset, task=eval_task, model_type=args.model_type) quantizer.model = common.Model(model) quantizer.calib_dataloader = common.DataLoader( dataset, batch_size=args.eval_batch_size) quantizer.eval_func = eval_func_for_lpot q_model = quantizer() q_model.save(args.tuned_checkpoint) exit(0) if args.benchmark or args.accuracy_only: model = model_class.from_pretrained(checkpoint, mix_qkv=True) model.to(args.device) if args.int8: from lpot.utils.pytorch import load new_model = load( os.path.abspath( os.path.expanduser(args.tuned_checkpoint)), model) else: new_model = model result, _ = evaluate(args, new_model, tokenizer, prefix=global_step) exit(0) if args.do_calibration: model = model_class.from_pretrained(checkpoint, force_download=True, mix_qkv=True) model.to(args.device) model.qconfig = default_per_channel_qconfig propagate_qconfig_(model) add_observer_(model) # Evaluate evaluate(args, model, tokenizer, prefix=global_step, calibration=True) convert(model, inplace=True) quantized_model_path = "squad" + str( global_step) + "_quantized_model" if not os.path.exists(quantized_model_path): os.makedirs(quantized_model_path) model.save_pretrained(quantized_model_path) result, _ = evaluate(args, model, tokenizer, prefix=global_step) result = dict( (k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) if args.do_int8_inference: model = model_class.from_pretrained(checkpoint, force_download=True, mix_qkv=True) model.to(args.device) model.qconfig = default_per_channel_qconfig propagate_qconfig_(model) add_observer_(model) convert(model, inplace=True) quantized_model_path = "squad" + str( global_step) + "_quantized_model" if not os.path.exists(quantized_model_path): logger.info("Please run calibration first!") return model_bin_file = os.path.join(quantized_model_path, "pytorch_model.bin") state_dict = torch.load(model_bin_file) model.load_state_dict(state_dict) print(model) with torch.autograd.profiler.profile() as prof: result, _ = evaluate(args, model, tokenizer, prefix=global_step) print(prof.key_averages().table(sort_by="cpu_time_total")) result = dict( (k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items()) results.update(result) logger.info("Results: {}".format(results)) return results