コード例 #1
0
    def test_disable_matmul_fusion(self):
        g = tf.Graph()
        with g.as_default():

            x_data = np.array([[0.1, 0.2], [0.2, 0.3]])
            y_data = np.array([[1, 2], [3, 4]], dtype=np.float)
            x = tf.placeholder(tf.float32, shape=[2, 2], name='x')
            y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2])
            z = tf.matmul(x, y, name='no_quant_matmul')
            z = tf.nn.relu6(z, name='op_to_store')
            found_quantized_matmul = False

            with tf.Session() as sess:
                sess.run(z, feed_dict={x: x_data, y: y_data})
                float_graph_def = sess.graph.as_graph_def()

                from lpot import Quantization, common
                quantizer = Quantization('fake_yaml.yaml')
                dataset = quantizer.dataset('dummy', shape=(2, 2), label=True)
                quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2)
                quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2)
                quantizer.model = float_graph_def
                output_graph = quantizer()

                for i in output_graph.graph_def.node:
                    if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store':
                        found_quantized_matmul = True
                        break
            self.assertEqual(found_quantized_matmul, False)
コード例 #2
0
    def test_first_matmul_biasadd_relu_fusion(self):
        x_data = np.array([[0.1, 0.2], [0.2, 0.3]])
        y_data = np.array([[1, 2], [3, 4]], dtype=np.float)
        x = tf.placeholder(tf.float32, shape=[2, 2], name='x')
        y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2])
        z = tf.matmul(x, y)
        z = tf.nn.bias_add(z, [1, 2])
        z = tf.nn.relu(z,  name='op_to_store')

        with tf.Session() as sess:

            sess.run(z, feed_dict={x: x_data, y: y_data})
            float_graph_def = sess.graph.as_graph_def()

            from lpot import Quantization, common
            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy', shape=(2, 2), label=True)
            quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2)
            quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2)
            quantizer.model = float_graph_def
            output_graph = quantizer()

            found_quantized_matmul = False
            for i in output_graph.graph_def.node:
                if i.op == 'QuantizeV2' and i.name == 'MatMul_eightbit_quantize_x' and i.attr["T"].type == dtypes.quint8:
                    found_quantized_matmul = True
                    break

            self.assertEqual(found_quantized_matmul, True)
コード例 #3
0
    def test_loss_calculation(self):
        from lpot.strategy.tpe import TpeTuneStrategy
        from lpot import Quantization, common

        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.model = self.constant_graph

        testObject = TpeTuneStrategy(quantizer.model, quantizer.conf,
                                     quantizer.calib_dataloader)
        testObject._calculate_loss_function_scaling_components(
            0.01, 2, testObject.loss_function_config)
        # check if latency difference between min and max corresponds to 10 points of loss function
        tmp_val = testObject.calculate_loss(0.01, 2,
                                            testObject.loss_function_config)
        tmp_val2 = testObject.calculate_loss(0.01, 1,
                                             testObject.loss_function_config)
        self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False)
        # check if 1% of acc difference corresponds to 10 points of loss function
        tmp_val = testObject.calculate_loss(0.02, 2,
                                            testObject.loss_function_config)
        tmp_val2 = testObject.calculate_loss(0.03, 2,
                                             testObject.loss_function_config)
        self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False)
コード例 #4
0
ファイル: test_adaptor_pytorch.py プロジェクト: mbasnet1/lpot
 def test_tuning_ipex(self):
     from lpot import Quantization
     model = torchvision.models.resnet18()
     model = MODELS['pytorch_ipex'](model)
     quantizer = Quantization('ipex_yaml.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True)
     quantizer.model = common.Model(model)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     lpot_model = quantizer()
     lpot_model.save("./saved")
     new_model = MODELS['pytorch_ipex'](model.model, {
         "workspace_path": "./saved"
     })
     new_model.model.to(ipex.DEVICE)
     try:
         script_model = torch.jit.script(new_model.model)
     except:
         script_model = torch.jit.trace(
             new_model.model,
             torch.randn(10, 3, 224, 224).to(ipex.DEVICE))
     from lpot import Benchmark
     evaluator = Benchmark('ipex_yaml.yaml')
     evaluator.model = common.Model(script_model)
     evaluator.b_dataloader = common.DataLoader(dataset)
     results = evaluator()
コード例 #5
0
ファイル: test_adaptor_pytorch.py プロジェクト: mbasnet1/lpot
    def test_quantization_saved(self):
        from lpot.utils.pytorch import load

        model = copy.deepcopy(self.model)

        for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']:
            if fake_yaml == 'ptq_yaml.yaml':
                model.eval().fuse_model()
            quantizer = Quantization(fake_yaml)
            dataset = quantizer.dataset('dummy', (100, 3, 256, 256),
                                        label=True)
            quantizer.model = common.Model(model)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            if fake_yaml == 'qat_yaml.yaml':
                quantizer.q_func = q_func
            q_model = quantizer()
            q_model.save('./saved')
            # Load configure and weights by lpot.utils
            saved_model = load("./saved", model)
            eval_func(saved_model)
        from lpot import Benchmark
        evaluator = Benchmark('ptq_yaml.yaml')
        # Load configure and weights by lpot.model
        evaluator.model = common.Model(model)
        evaluator.b_dataloader = common.DataLoader(dataset)
        results = evaluator()
        evaluator.model = common.Model(model)
        fp32_results = evaluator()
        self.assertTrue(
            (fp32_results['accuracy'][0] - results['accuracy'][0]) < 0.01)
コード例 #6
0
    def test_dump_tensor_to_disk(self):
        import tensorflow.compat.v1 as tf
        tf.disable_v2_behavior()
        from lpot import Quantization, common

        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy',
                                    shape=(100, 30, 30, 1),
                                    label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.model = self.constant_graph
        quantizer()

        with open(self.calibration_log_path) as f:
            data = f.readlines()

        found_min_str = False
        found_max_str = False
        for i in data:
            if i.find('__print__;__max') != -1:
                found_max_str = True
            if i.find('__print__;__min') != -1:
                found_min_str = True

        self.assertEqual(os.path.exists(self.calibration_log_path), True)
        self.assertGreater(len(data), 1)
        self.assertEqual(found_min_str, True)
        self.assertEqual(found_max_str, True)
コード例 #7
0
ファイル: test_mse.py プロジェクト: mbasnet1/lpot
 def test_ru_mse_max_trials(self):
     from lpot import Quantization, common
     quantizer = Quantization('fake_yaml2.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     quantizer.model = self.constant_graph
     quantizer()
コード例 #8
0
 def test_autodump(self):
     from lpot import Quantization, common
     quantizer = Quantization('fake_yaml3.yaml')
     dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.model = self.constant_graph
     output_graph = quantizer()
コード例 #9
0
    def test_conv_fusion_with_last_matmul(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        top_relu = tf.nn.relu(x)
        # paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
        # x_pad = tf.pad(top_relu, paddings, "CONSTANT")
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(top_relu,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)

        relu = tf.nn.relu(normed)
        pooling = tf.nn.max_pool(relu,
                                 ksize=1,
                                 strides=[1, 2, 2, 1],
                                 padding="SAME")
        reshape = tf.reshape(pooling, [-1, 3136])

        y_data = np.random.random([3136, 1])

        y = tf.constant(y_data, dtype=tf.float32, shape=[3136, 1])
        z = tf.matmul(reshape, y)
        y_data_1 = np.random.random([1, 1])
        y_1 = tf.constant(y_data_1, dtype=tf.float32, shape=[1, 1])

        z_2nd_matmul = tf.matmul(z, y_1)
        relu6 = tf.nn.relu6(z_2nd_matmul, name='op_to_store')

        out_name = relu6.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])

            from lpot import Quantization, common
            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()

            quantize_v2_count = 0
            for i in output_graph.graph_def.node:
                if i.op == 'QuantizeV2':
                    quantize_v2_count += 1
                    break

            self.assertEqual(quantize_v2_count, 1)
コード例 #10
0
    def test_tensorflow_graph_meta_pass(self):

        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        top_relu = tf.nn.relu(x)
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(top_relu,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)

        relu = tf.nn.relu(normed)
        sq = tf.squeeze(relu, [0])
        reshape = tf.reshape(sq, [1, 27, 27, 16])
        conv_weights2 = tf.compat.v1.get_variable(
            "weight2", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv2 = tf.nn.conv2d(reshape,
                             conv_weights2,
                             strides=[1, 2, 2, 1],
                             padding="VALID")
        normed2 = tf.compat.v1.layers.batch_normalization(conv2)

        relu6 = tf.nn.relu6(normed2, name='op_to_store')

        out_name = relu6.name.split(':')[0]

        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])
            from lpot import Quantization, common

            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()
            quantize_count = 0
            dequantize_count = 0

            for i in output_graph.graph_def.node:
                if i.op == 'QuantizeV2':
                    quantize_count += 1
                if i.op == 'Dequantize':
                    dequantize_count += 1

            self.assertEqual(quantize_count, 1)
            self.assertEqual(dequantize_count, 1)
コード例 #11
0
ファイル: test_bayesian.py プロジェクト: mbasnet1/lpot
    def test_run_bayesian_max_trials(self):

        from lpot import Quantization, common
        quantizer = Quantization('fake_yaml2.yaml')
        dataset = quantizer.dataset('dummy',
                                    shape=(1, 224, 224, 3),
                                    label=True)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.model = self.test_graph
        output_graph = quantizer()
コード例 #12
0
    def test_conv_biasadd_addv2_relu_fusion(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        top_relu = tf.nn.relu(x)
        paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
        x_pad = tf.pad(top_relu, paddings, "CONSTANT")
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(x_pad,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)
        # relu = tf.nn.relu(normed)

        conv_weights2 = tf.compat.v1.get_variable(
            "weight2", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv2 = tf.nn.conv2d(top_relu,
                             conv_weights2,
                             strides=[1, 2, 2, 1],
                             padding="SAME")
        normed2 = tf.compat.v1.layers.batch_normalization(conv2)
        # relu2 = tf.nn.relu(normed2)
        add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2')
        relu = tf.nn.relu(add)
        relu6 = tf.nn.relu6(relu, name='op_to_store')

        out_name = relu6.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])

            from lpot import Quantization, common
            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()

            found_conv_fusion = False

            for i in output_graph.graph_def.node:
                if i.op == 'QuantizedConv2DWithBiasSignedSumAndReluAndRequantize':
                    found_conv_fusion = True
                    break

            self.assertEqual(found_conv_fusion, True)
コード例 #13
0
ファイル: test_tensorboard.py プロジェクト: mbasnet1/lpot
    def test_run_basic_one_trial(self):
        from lpot import Quantization, common

        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy', (1, 224, 224, 3), label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.model = self.constant_graph
        quantizer()

        self.assertTrue(True if len(os.listdir("./runs/eval")) > 2 else False)
コード例 #14
0
    def test_disable_scale_propagation(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input")
        conv_weights = tf.compat.v1.get_variable(
            "weight", [2, 2, 1, 1],
            initializer=tf.compat.v1.random_normal_initializer())
        conv_bias = tf.compat.v1.get_variable(
            "bias", [1], initializer=tf.compat.v1.random_normal_initializer())

        x = tf.nn.relu(x)
        conv = tf.nn.conv2d(x,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="SAME",
                            name='last')
        normed = tf.compat.v1.layers.batch_normalization(conv)

        relu = tf.nn.relu(normed)
        pool = tf.nn.avg_pool(relu,
                              ksize=1,
                              strides=[1, 2, 2, 1],
                              padding="SAME")
        conv1 = tf.nn.conv2d(pool,
                             conv_weights,
                             strides=[1, 2, 2, 1],
                             padding="SAME",
                             name='last')
        conv_bias = tf.nn.bias_add(conv1, conv_bias)
        x = tf.nn.relu(conv_bias)
        final_node = tf.nn.relu(x, name='op_to_store')

        out_name = final_node.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])
            from lpot import Quantization, common

            quantizer = Quantization(
                'fake_yaml_disable_scale_propagation.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 30, 30, 1),
                                        label=True)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()

            max_freezed_out = []
            for i in output_graph.graph_def.node:
                if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize':
                    max_freezed_out.append(i.input[-1])
            self.assertEqual(2, len(set(max_freezed_out)))
コード例 #15
0
ファイル: test_bf16_convert.py プロジェクト: mbasnet1/lpot
    def test_bf16_rnn(self):
        os.environ['FORCE_BF16'] = '1'

        inp = tf.keras.layers.Input(shape=(None, 4))
        lstm_1 = tf.keras.layers.LSTM(units=10, return_sequences=True)(inp)
        dropout_1 = tf.keras.layers.Dropout(0.2)(lstm_1)
        lstm_2 = tf.keras.layers.LSTM(units=10,
                                      return_sequences=False)(dropout_1)
        dropout_2 = tf.keras.layers.Dropout(0.2)(lstm_2)
        out = tf.keras.layers.Dense(1)(dropout_2)
        model = tf.keras.models.Model(inputs=inp, outputs=out)

        model.compile(loss="mse", optimizer=tf.keras.optimizers.RMSprop())

        # input_names = [t.name.split(":")[0] for t in model.inputs]
        output_names = [t.name.split(":")[0] for t in model.outputs]

        q_data = np.random.randn(64, 10, 4)
        label = np.random.randn(64, 1)
        model.predict(q_data)

        sess = tf.keras.backend.get_session()

        graph = sess.graph

        from tensorflow.python.framework import graph_util
        graph_def = graph_util.convert_variables_to_constants(
            sess,
            graph.as_graph_def(),
            output_names,
        )
        quant_data = (q_data, label)
        evl_data = (q_data, label)

        from lpot import Quantization, common

        quantizer = Quantization('fake_bf16_rnn.yaml')
        quantizer.calib_dataloader = common.DataLoader(
            dataset=list(zip(quant_data[0], quant_data[1])))
        quantizer.eval_dataloader = common.DataLoader(
            dataset=list(zip(evl_data[0], evl_data[1])))
        quantizer.model = graph_def
        quantized_model = quantizer()

        convert_to_bf16_flag = False
        for i in quantized_model.graph_def.node:
            if i.name == 'lstm/while/MatMul_3' and \
                i.attr['T'].type == dtypes.bfloat16.as_datatype_enum:
                convert_to_bf16_flag = True

        self.assertEqual(convert_to_bf16_flag, True)
コード例 #16
0
    def test_fold_pad_conv2(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
        x_pad = tf.pad(x, paddings, "CONSTANT")
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(x_pad,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)
        relu = tf.nn.relu(normed)

        paddings2 = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
        x_pad2 = tf.pad(x, paddings2, "CONSTANT")
        conv_weights2 = tf.compat.v1.get_variable(
            "weight2", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv2 = tf.nn.conv2d(x_pad2,
                             conv_weights2,
                             strides=[1, 2, 2, 1],
                             padding="VALID")
        normed2 = tf.compat.v1.layers.batch_normalization(conv2)
        relu2 = tf.nn.relu(normed2)
        add = tf.math.add(relu, relu2, name='op_to_store')
        out_name = add.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])
            from lpot import Quantization, common
            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()
            found_pad = False

            if tf.__version__ >= "2.0.0":
                for i in output_graph.graph_def.node:
                    if i.op == 'Pad':
                        found_pad = True
                        break
                self.assertEqual(found_pad, True)
コード例 #17
0
    def test_no_input_output_config(self):
        g = GraphAnalyzer()
        g.graph = self.input_graph
        g.parse_graph()

        float_graph_def = g.dump_graph()
        from lpot import Quantization, common

        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2)
        quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2)
        quantizer.model = float_graph_def
        output_graph = quantizer()
        self.assertGreater(len(output_graph.graph_def.node), 0)
コード例 #18
0
ファイル: test_adaptor_pytorch.py プロジェクト: mbasnet1/lpot
 def test_tensor_dump(self):
     model = copy.deepcopy(self.lpot_model)
     model.model.eval().fuse_model()
     quantizer = Quantization('dump_yaml.yaml')
     dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True)
     quantizer.model = common.Model(model.model)
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantizer.eval_func = eval_func
     quantizer()
     self.assertTrue(
         True if os.path.exists('runs/eval/baseline_acc0.0') else False)
     quantizer.eval_dataloader = common.DataLoader(dataset)
     quantizer()
     self.assertTrue(
         True if os.path.exists('runs/eval/baseline_acc0.0') else False)
コード例 #19
0
    def test_invalid_input_output_config(self):
        g = GraphAnalyzer()
        g.graph = self.input_graph
        g.parse_graph()

        float_graph_def = g.dump_graph()
        from lpot import Quantization, common

        quantizer = Quantization('fake_yaml_2.yaml')
        dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2)
        quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2)
        quantizer.model = float_graph_def
        model = quantizer()
        # will detect the right inputs/outputs
        self.assertNotEqual(model.input_node_names, ['x'])
        self.assertNotEqual(model.output_node_names, ['op_to_store'])
コード例 #20
0
ファイル: test_bf16_convert.py プロジェクト: mbasnet1/lpot
    def test_bf16_fallback(self):
        os.environ['FORCE_BF16'] = '1'

        from lpot import Quantization, common
        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy',
                                    shape=(1, 224, 224, 3),
                                    label=True)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.model = self.test_graph
        output_graph = quantizer()
        cast_op_count = 0
        for node in output_graph.graph_def.node:
            if node.op == 'Cast':
                cast_op_count += 1
        self.assertTrue(cast_op_count >= 1)
コード例 #21
0
    def test_enable_first_quantization(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        top_relu = tf.nn.relu(x)
        paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
        x_pad = tf.pad(top_relu, paddings, "CONSTANT")
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(x_pad,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)

        relu = tf.nn.relu(normed)

        relu6 = tf.nn.relu6(relu, name='op_to_store')

        out_name = relu6.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])
            from lpot import Quantization, common

            quantizer = Quantization(
                'fake_yaml_enable_first_quantization.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()

            found_fp32_conv = False

            for i in output_graph.graph_def.node:
                if i.op == 'Conv2D':
                    found_fp32_conv = True
                    break

            self.assertEqual(found_fp32_conv, False)
コード例 #22
0
def main():

    import lpot
    quantizer = lpot.Quantization('./conf.yaml')
    dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
    quantizer.model = common.Model(
        './model/public/rfcn-resnet101-coco-tf/model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/'
    )
    quantizer.calib_dataloader = common.DataLoader(dataset)
    quantized_model = quantizer()
コード例 #23
0
    def test_autosave(self):
        from lpot import Quantization, common
        from lpot.utils.utility import get_size

        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy', (100, 256, 256, 1), label=True)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.model = self.constant_graph
        quantizer()

        q_model = quantizer()

        quantizer.model = self.constant_graph_1

        q_model_1 = quantizer()

        self.assertTrue((get_size(q_model_1.sess.graph) -
                         get_size(q_model.sess.graph)) > 0)
コード例 #24
0
    def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self):
        g = tf.Graph()
        with g.as_default():

            x_data = np.array([[0.1, 0.2], [0.2, 0.3]])
            y_data = np.array([[1, 2], [3, 4]], dtype=np.float)
            x = tf.placeholder(tf.float32, shape=[2, 2], name='x')
            y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2])
            z = tf.matmul(x, y)
            biasadd = tf.nn.bias_add(z, [1, 2])
            biasadd1 = tf.nn.bias_add(biasadd, [1, 1])

            y1 = tf.constant(x_data, dtype=tf.float32, shape=[2, 2])
            matmul1 = tf.matmul(biasadd1, y1)

            biasadd2 = tf.nn.bias_add(matmul1, [1, 1])

            z = tf.nn.softmax(biasadd2, name='op_to_store')
            found_quantized_matmul = False
            if tf.version.VERSION < "2.2.0":
                found_quantized_matmul = False
            else:
                with tf.Session() as sess:
                    sess.run(z, feed_dict={x: x_data, y: y_data})
                    float_graph_def = sess.graph.as_graph_def()

                    from lpot import Quantization, common
                    quantizer = Quantization('fake_yaml.yaml')
                    dataset = quantizer.dataset('dummy', shape=(2, 2), label=True)
                    quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2)
                    quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2)
                    quantizer.model = float_graph_def
                    output_graph = quantizer()

                    count=0
                    for i in output_graph.model.node:
                        if i.op == 'QuantizedMatMulWithBiasAndDequantize':
                            count += 1
                    found_quantized_matmul = bool(count > 1)
            self.assertEqual(found_quantized_matmul, False)
コード例 #25
0
    def test_register_metric_postprocess(self):
        import PIL.Image
        image = np.array(PIL.Image.open(self.image_path))
        resize_image = np.resize(image, (224, 224, 3))
        mean = [123.68, 116.78, 103.94]
        resize_image = resize_image - mean
        images = np.expand_dims(resize_image, axis=0)
        labels = [768]
        from lpot import Benchmark, Quantization, common
        from lpot.data.transforms.imagenet_transform import LabelShift
        from lpot.metric.metric import TensorflowTopK

        evaluator = Benchmark('fake_yaml.yaml')
        evaluator.postprocess = common.Postprocess(LabelShift,
                                                   'label_benchmark',
                                                   label_shift=1)
        evaluator.metric = common.Metric(TensorflowTopK, 'topk_benchmark')
        evaluator.b_dataloader = common.DataLoader(
            dataset=list(zip(images, labels)))
        evaluator.model = self.pb_path
        result = evaluator()
        acc, batch_size, result_list = result['accuracy']
        self.assertEqual(acc, 0.0)

        quantizer = Quantization('fake_yaml.yaml')
        quantizer.postprocess = common.Postprocess(LabelShift,
                                                   'label_quantize',
                                                   label_shift=1)
        quantizer.metric = common.Metric(TensorflowTopK, 'topk_quantize')

        evaluator = Benchmark('fake_yaml.yaml')
        evaluator.metric = common.Metric(TensorflowTopK, 'topk_second')

        evaluator.b_dataloader = common.DataLoader(
            dataset=list(zip(images, labels)))
        evaluator.model = self.pb_path
        result = evaluator()
        acc, batch_size, result_list = result['accuracy']
        self.assertEqual(acc, 0.0)
コード例 #26
0
    def test_tensorflow_concat_quantization(self):

        output_graph_def = read_graph(self.pb_path)

        from lpot import Quantization, common
        quantizer = Quantization('fake_yaml.yaml')
        dataset = quantizer.dataset('dummy',
                                    shape=(100, 299, 299, 3),
                                    label=True)
        quantizer.eval_dataloader = common.DataLoader(dataset)
        quantizer.calib_dataloader = common.DataLoader(dataset)
        quantizer.model = output_graph_def
        output_graph = quantizer()
        found_quantized_concat_node = False

        target_concat_node_name = 'v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2'
        from lpot.adaptor.tf_utils.graph_rewriter.graph_util import GraphAnalyzer
        cur_graph = GraphAnalyzer()
        cur_graph.graph = output_graph.graph_def
        graph_info = cur_graph.parse_graph()
        found_quantized_concat_node = target_concat_node_name in graph_info

        self.assertEqual(found_quantized_concat_node, True)
        min_out, max_out = [], []
        for input_conv_name in graph_info[
                target_concat_node_name].node.input[:4]:
            # print (input_conv_name, graph_info[input_conv_name].node.input)
            min_freezed_out_name = graph_info[input_conv_name].node.input[-2]
            max_freezed_out_name = graph_info[input_conv_name].node.input[-1]
            min_freezed_out_value = (graph_info[min_freezed_out_name].node.
                                     attr['value'].tensor.float_val)[0]
            max_freezed_out_value = (graph_info[max_freezed_out_name].node.
                                     attr['value'].tensor.float_val)[0]
            min_out.append(min_freezed_out_value)
            max_out.append(max_freezed_out_value)

        self.assertEqual(len(set(min_out)), 1)
        self.assertEqual(len(set(max_out)), 1)
コード例 #27
0
 def test_quantizate(self):
     from lpot import Quantization, common
     for fake_yaml in ["static_yaml.yaml", "dynamic_yaml.yaml"]:
         quantizer = Quantization(fake_yaml)
         dataset = quantizer.dataset("dummy", (100, 3, 224, 224),
                                     low=0.,
                                     high=1.,
                                     label=True)
         quantizer.calib_dataloader = common.DataLoader(dataset)
         quantizer.eval_dataloader = common.DataLoader(dataset)
         quantizer.model = common.Model(self.rn50_model)
         q_model = quantizer()
         eval_func(q_model)
     for fake_yaml in ["non_MSE_yaml.yaml"]:
         quantizer = Quantization(fake_yaml)
         dataset = quantizer.dataset("dummy", (100, 3, 224, 224),
                                     low=0.,
                                     high=1.,
                                     label=True)
         quantizer.calib_dataloader = common.DataLoader(dataset)
         quantizer.eval_dataloader = common.DataLoader(dataset)
         quantizer.model = common.Model(self.mb_v2_model)
         q_model = quantizer()
         eval_func(q_model)
コード例 #28
0
ファイル: main.py プロジェクト: mbasnet1/lpot
def main(_):
    graph = load_graph(FLAGS.input_graph)
    if FLAGS.mode == 'tune':
        from lpot import Quantization, common
        quantizer = Quantization(FLAGS.config)
        ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
        quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \
                                                 batch_size=FLAGS.batch_size)
        quantizer.model = common.Model(graph)
        quantizer.eval_func = eval_func
        q_model = quantizer()
        try:
            q_model.save(FLAGS.output_model)
        except Exception as e:
            print("Failed to save model due to {}".format(str(e)))
    elif FLAGS.mode == 'benchmark':
        eval_func(graph, FLAGS.iters)
    elif FLAGS.mode == 'accuracy':
        eval_func(graph, -1)
コード例 #29
0
    def test_conv_fusion_with_last_conv(self):
        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
        top_relu = tf.nn.relu(x)
        conv_weights = tf.compat.v1.get_variable(
            "weight", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv = tf.nn.conv2d(top_relu,
                            conv_weights,
                            strides=[1, 2, 2, 1],
                            padding="VALID")
        normed = tf.compat.v1.layers.batch_normalization(conv)

        relu = tf.nn.relu(normed)
        pooling = tf.nn.max_pool(relu,
                                 ksize=1,
                                 strides=[1, 2, 2, 1],
                                 padding="SAME")
        conv_weights_2 = tf.compat.v1.get_variable(
            "weight2", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        conv2 = tf.nn.conv2d(pooling,
                             conv_weights_2,
                             strides=[1, 2, 2, 1],
                             padding="VALID")
        conv_weights_3 = tf.compat.v1.get_variable(
            "weight3", [3, 3, 16, 16],
            initializer=tf.compat.v1.random_normal_initializer())
        relu2 = tf.nn.relu(conv2)
        conv3 = tf.nn.conv2d(relu2,
                             conv_weights_3,
                             strides=[1, 2, 2, 1],
                             padding="VALID")

        relu3 = tf.nn.relu(conv3)
        relu6 = tf.nn.relu6(relu3, name='op_to_store')

        out_name = relu6.name.split(':')[0]
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_graph_def = graph_util.convert_variables_to_constants(
                sess=sess,
                input_graph_def=sess.graph_def,
                output_node_names=[out_name])

            from lpot import Quantization, common
            quantizer = Quantization('fake_yaml.yaml')
            dataset = quantizer.dataset('dummy',
                                        shape=(100, 56, 56, 16),
                                        label=True)
            quantizer.eval_dataloader = common.DataLoader(dataset)
            quantizer.calib_dataloader = common.DataLoader(dataset)
            quantizer.model = output_graph_def
            output_graph = quantizer()

            quantize_v2_count = 0
            for i in output_graph.graph_def.node:
                if i.op == 'QuantizeV2':
                    quantize_v2_count += 1
                    break

            self.assertEqual(quantize_v2_count, 1)
コード例 #30
0
ファイル: run_squad_tune.py プロジェクト: mbasnet1/lpot
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--model_type",
                        default=None,
                        type=str,
                        required=True,
                        help="Model type selected in the list: " +
                        ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="Path to pre-trained model or shortcut name selected in the list: "
        + ", ".join(ALL_MODELS))
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model checkpoints and predictions will be written."
    )

    # Other parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        help=
        "The input data dir. Should contain the .json files for the task. If not specified, will run with tensorflow_datasets."
    )
    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name")
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3")

    parser.add_argument(
        '--version_2_with_negative',
        action='store_true',
        help=
        'If true, the SQuAD examples contain some that do not have an answer.')
    parser.add_argument(
        '--null_score_diff_threshold',
        type=float,
        default=0.0,
        help=
        "If null_score - best_non_null is greater than the threshold predict null."
    )

    parser.add_argument(
        "--max_seq_length",
        default=384,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. Sequences "
        "longer than this will be truncated, and sequences shorter than this will be padded."
    )
    parser.add_argument(
        "--doc_stride",
        default=128,
        type=int,
        help=
        "When splitting up a long document into chunks, how much stride to take between chunks."
    )
    parser.add_argument(
        "--max_query_length",
        default=64,
        type=int,
        help=
        "The maximum number of tokens for the question. Questions longer than this will "
        "be truncated to this length.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--evaluate_during_training",
        action='store_true',
        help="Rul evaluation during training at each logging step.")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")

    parser.add_argument("--per_gpu_train_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument(
        '--gradient_accumulation_steps',
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs."
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument(
        "--n_best_size",
        default=20,
        type=int,
        help=
        "The total number of n-best predictions to generate in the nbest_predictions.json output file."
    )
    parser.add_argument(
        "--max_answer_length",
        default=30,
        type=int,
        help=
        "The maximum length of an answer that can be generated. This is needed because the start "
        "and end predictions are not conditioned on one another.")
    parser.add_argument(
        "--verbose_logging",
        action='store_true',
        help=
        "If true, all of the warnings related to data processing will be printed. "
        "A number of warnings are expected for a normal SQuAD evaluation.")

    parser.add_argument('--logging_steps',
                        type=int,
                        default=50,
                        help="Log every X updates steps.")
    parser.add_argument('--save_steps',
                        type=int,
                        default=50,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action='store_true',
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number"
    )
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--overwrite_output_dir',
                        action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        '--overwrite_cache',
        action='store_true',
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument(
        '--fp16',
        action='store_true',
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit"
    )
    parser.add_argument(
        '--fp16_opt_level',
        type=str,
        default='O1',
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html")
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument("--do_calibration",
                        action='store_true',
                        help="Whether to do calibration.")
    parser.add_argument("--do_int8_inference",
                        action='store_true',
                        help="Whether to run int8 inference.")
    parser.add_argument("--do_fp32_inference",
                        action='store_true',
                        help="Whether to run fp32 inference.")
    parser.add_argument("--mkldnn_eval",
                        action='store_true',
                        help="evaluation with MKLDNN")
    parser.add_argument(
        "--tune",
        action='store_true',
        help="run Low Precision Optimization Tool to tune int8 acc.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="SQuAD task")
    parser.add_argument("--warmup",
                        type=int,
                        default=5,
                        help="warmup for performance")
    parser.add_argument('-i',
                        "--iter",
                        default=0,
                        type=int,
                        help='For accuracy measurement only.')
    parser.add_argument('--config',
                        type=str,
                        default='conf.yaml',
                        help="yaml config file")
    parser.add_argument('--benchmark',
                        dest='benchmark',
                        action='store_true',
                        help='run benchmark')
    parser.add_argument('-r',
                        "--accuracy_only",
                        dest='accuracy_only',
                        action='store_true',
                        help='For accuracy measurement only.')
    parser.add_argument(
        "--tuned_checkpoint",
        default='./saved_results',
        type=str,
        metavar='PATH',
        help=
        'path to checkpoint tuned by Low Precision Optimization Tool (default: ./)'
    )
    parser.add_argument('--int8',
                        dest='int8',
                        action='store_true',
                        help='run benchmark')

    args = parser.parse_args()

    args.predict_file = os.path.join(
        args.output_dir, 'predictions_{}_{}.txt'.format(
            list(filter(None, args.model_name_or_path.split('/'))).pop(),
            str(args.max_seq_length)))

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    mix_qkv = False
    if args.do_calibration or args.do_int8_inference or args.tune:
        mix_qkv = True

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl')
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank, device, args.n_gpu, bool(args.local_rank != -1),
        args.fp16)

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None)
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None)
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool('.ckpt' in args.model_name_or_path),
        config=config,
        mix_qkv=mix_qkv,
        cache_dir=args.cache_dir if args.cache_dir else None)

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, 'einsum')
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False,
                                                output_examples=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Save the trained model and the tokenizer
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = model.module if hasattr(
            model,
            'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))

        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.output_dir,
                                            force_download=True,
                                            mix_qkv=mix_qkv)
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce model loading logs

        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split(
                '-')[-1] if len(checkpoints) > 1 else ""
            if args.mkldnn_eval or args.do_fp32_inference:
                model = model_class.from_pretrained(checkpoint,
                                                    force_download=True)
                model.to(args.device)

                # Evaluate
                result, _ = evaluate(args,
                                     model,
                                     tokenizer,
                                     prefix=global_step)
                result = dict(
                    (k + ('_{}'.format(global_step) if global_step else ''), v)
                    for k, v in result.items())
                results.update(result)

            if args.tune:

                def eval_func_for_lpot(model):
                    result, _ = evaluate(args, model, tokenizer)
                    for key in sorted(result.keys()):
                        logger.info("  %s = %s", key, str(result[key]))
                    bert_task_acc_keys = [
                        'best_f1', 'f1', 'mcc', 'spearmanr', 'acc'
                    ]
                    for key in bert_task_acc_keys:
                        if key in result.keys():
                            logger.info("Finally Eval {}:{}".format(
                                key, result[key]))
                            acc = result[key]
                            break
                    return acc

                model = model_class.from_pretrained(checkpoint,
                                                    force_download=True,
                                                    mix_qkv=True)
                model.to(args.device)
                dataset = load_and_cache_examples(args,
                                                  tokenizer,
                                                  evaluate=True,
                                                  output_examples=False)
                args.eval_batch_size = args.per_gpu_eval_batch_size * max(
                    1, args.n_gpu)
                eval_task = "squad"
                from lpot import Quantization, common
                quantizer = Quantization(args.config)
                dataset = quantizer.dataset('bert',
                                            dataset=dataset,
                                            task=eval_task,
                                            model_type=args.model_type)
                quantizer.model = common.Model(model)
                quantizer.calib_dataloader = common.DataLoader(
                    dataset, batch_size=args.eval_batch_size)
                quantizer.eval_func = eval_func_for_lpot
                q_model = quantizer()
                q_model.save(args.tuned_checkpoint)
                exit(0)

            if args.benchmark or args.accuracy_only:
                model = model_class.from_pretrained(checkpoint, mix_qkv=True)
                model.to(args.device)
                if args.int8:
                    from lpot.utils.pytorch import load
                    new_model = load(
                        os.path.abspath(
                            os.path.expanduser(args.tuned_checkpoint)), model)
                else:
                    new_model = model
                result, _ = evaluate(args,
                                     new_model,
                                     tokenizer,
                                     prefix=global_step)
                exit(0)

            if args.do_calibration:
                model = model_class.from_pretrained(checkpoint,
                                                    force_download=True,
                                                    mix_qkv=True)
                model.to(args.device)
                model.qconfig = default_per_channel_qconfig
                propagate_qconfig_(model)
                add_observer_(model)
                # Evaluate
                evaluate(args,
                         model,
                         tokenizer,
                         prefix=global_step,
                         calibration=True)
                convert(model, inplace=True)
                quantized_model_path = "squad" + str(
                    global_step) + "_quantized_model"
                if not os.path.exists(quantized_model_path):
                    os.makedirs(quantized_model_path)
                model.save_pretrained(quantized_model_path)
                result, _ = evaluate(args,
                                     model,
                                     tokenizer,
                                     prefix=global_step)
                result = dict(
                    (k + ('_{}'.format(global_step) if global_step else ''), v)
                    for k, v in result.items())
                results.update(result)
            if args.do_int8_inference:
                model = model_class.from_pretrained(checkpoint,
                                                    force_download=True,
                                                    mix_qkv=True)
                model.to(args.device)
                model.qconfig = default_per_channel_qconfig
                propagate_qconfig_(model)
                add_observer_(model)
                convert(model, inplace=True)
                quantized_model_path = "squad" + str(
                    global_step) + "_quantized_model"
                if not os.path.exists(quantized_model_path):
                    logger.info("Please run calibration first!")
                    return
                model_bin_file = os.path.join(quantized_model_path,
                                              "pytorch_model.bin")
                state_dict = torch.load(model_bin_file)
                model.load_state_dict(state_dict)
                print(model)
                with torch.autograd.profiler.profile() as prof:
                    result, _ = evaluate(args,
                                         model,
                                         tokenizer,
                                         prefix=global_step)
                print(prof.key_averages().table(sort_by="cpu_time_total"))
                result = dict(
                    (k + ('_{}'.format(global_step) if global_step else ''), v)
                    for k, v in result.items())
                results.update(result)
    logger.info("Results: {}".format(results))

    return results