Esempio n. 1
0
    def testModelEndToEnd(self, model_fn):
        # 1. Check whether quantized model graph can be constructed.
        model = model_fn(self)
        model = quantize.quantize_model(model)

        # 2. Sanity check to ensure basic training on random data works.
        x_train, y_train = self._create_test_data(model)
        model.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=100)

        x_test, y_test = self._create_test_data(model)

        y_tf = model.predict(x_test)

        # 3. Ensure conversion to TFLite works.
        _, tflite_file = tempfile.mkstemp('.tflite')
        print('TFLite File: ', tflite_file)
        with quantize.quantize_scope():
            utils.convert_keras_to_tflite(model, tflite_file)

        # 4. Verify input runs on converted model.
        y_tfl = self._execute_tflite(tflite_file, x_test, y_test)

        # 5. Verify results are the same in TF and TFL.
        # TODO(pulkitb): Temporarily raise tolerances since some rounding
        # changes in x86 kernels are causing values to differ by 'scale'.
        self.assertAllClose(y_tf, y_tfl, atol=1e-1, rtol=1e-1)
    def _test_equivalent_to_tflite(self, model, is_tflite_quantized=False):
        _, keras_file = tempfile.mkstemp('.h5')
        _, tflite_file = tempfile.mkstemp('.tflite')

        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])

        model.fit(np.random.uniform(0, 1, size=[self.batch_size, 10, 10, 3]),
                  np.random.uniform(0, 10, size=[self.batch_size, 8, 8, 2]),
                  epochs=1,
                  callbacks=[])

        # Prepare for inference.
        inp = np.random.uniform(0, 1, size=[self.batch_size, 10, 10, 3])
        inp = inp.astype(np.float32)

        # TensorFlow inference.
        tf_out = model.predict(inp)

        if is_tflite_quantized:
            scale, zero_point = self._compute_quantization_params(model)

            # TFLite input needs to be quantized.
            inp = inp * 255
            inp = inp.astype(np.uint8)

        # TensorFlow Lite inference.
        tf.keras.models.save_model(model, keras_file)
        with quantize.quantize_scope():
            utils.convert_keras_to_tflite(
                keras_file,
                tflite_file,
                custom_objects={'_ConvBatchNorm2D': _ConvBatchNorm2D},
                is_quantized=is_tflite_quantized)

        interpreter = tf.lite.Interpreter(model_path=tflite_file)
        interpreter.allocate_tensors()
        input_index = interpreter.get_input_details()[0]['index']
        output_index = interpreter.get_output_details()[0]['index']

        interpreter.set_tensor(input_index, inp)
        interpreter.invoke()
        tflite_out = interpreter.get_tensor(output_index)

        if is_tflite_quantized:
            # dequantize outputs
            tflite_out = [scale * (x - zero_point) for x in tflite_out]
            # Off by 1 in quantized output. Notably we cannot reduce this. There is
            # an existing mismatch between TensorFlow and TFLite (from
            # contrib.quantize days).
            self.assertAllClose(tf_out, tflite_out, atol=scale)
        else:
            # Taken from testFoldFusedBatchNorms from
            # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230
            self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)
Esempio n. 3
0
    def testMnistAccuracyinTFLite(self):
        num_classes = 10
        train_data, test_data, input_shape = test_utils.get_preprocessed_mnist_data(
            num_classes=num_classes)
        x_train, y_train = train_data
        x_test, y_test = test_data

        def linear(x):
            return x

        l = keras.layers
        model = keras.Sequential([
            QuantizeEmulate(l.Conv2D(32, 5, padding='same', activation='relu'),
                            input_shape=input_shape,
                            **self.params),
            l.MaxPooling2D((2, 2), (2, 2), padding='same'),
            QuantizeEmulate(l.Conv2D(64, 5, padding='same', activation='relu'),
                            **self.params),
            l.MaxPooling2D((2, 2), (2, 2), padding='same'),
            l.Flatten(),
            QuantizeEmulate(l.Dense(1024, activation='relu'), **self.params),
            l.Dropout(0.4),
            # TODO(alanchiao): fuse softmax once we've handled it.
            # Once we use QuantizeAwareActivation, pre/post activation should be
            # handled. Adding dummy activation to force adding of quant operator.
            QuantizeEmulate(l.Dense(num_classes, activation=linear),
                            **self.params),
            l.Softmax(),
        ])

        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])

        model.fit(x_train,
                  y_train,
                  batch_size=128,
                  epochs=1,
                  validation_data=(x_test, y_test))

        tf_accuracy = model.evaluate(x_test, y_test, verbose=0)[1]

        # High enough to validate that training is happening, with significantly
        # better than 0.1 random accuracy.
        self.assertGreater(tf_accuracy, 0.4)

        _, keras_file = tempfile.mkstemp('.h5')
        _, tflite_file = tempfile.mkstemp('.h5')

        keras.models.save_model(model, keras_file)
        utils.convert_keras_to_tflite(keras_file, tflite_file,
                                      {'linear': linear})
        tflite_accuracy = test_utils.eval_mnist_tflite(tflite_file,
                                                       is_quantized=True)

        self.assertAlmostEqual(tf_accuracy, tflite_accuracy, delta=0.01)
Esempio n. 4
0
    def testProductionModelConversionToTFLite(self):
        # small input shape to keep test running quickly.
        model = tf.keras.applications.mobilenet.MobileNet(weights=None,
                                                          input_shape=(32, 32,
                                                                       3))

        annotated = quantize_annotate(model)
        quantized_model = quantize_apply(annotated)

        _, tflite_file = tempfile.mkstemp('.h5')

        with quantize.quantize_scope():
            utils.convert_keras_to_tflite(quantized_model, tflite_file)
    def testQuantizesMnist(self):
        if not compat.is_v1_apis():
            return

        model = test_utils_mnist.sequential_model()
        x_train, y_train, x_test, y_test = test_utils_mnist.preprocessed_data()

        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])
        model.fit(x_train, y_train, batch_size=500)
        _, model_accuracy = model.evaluate(x_test, y_test, verbose=0)

        quantized_model = quantize.quantize_model(model)
        quantized_model.compile(loss='categorical_crossentropy',
                                optimizer='sgd',
                                metrics=['accuracy'])

        quantized_model.fit(x_train, y_train, batch_size=500)
        _, quantized_model_accuracy = quantized_model.evaluate(x_test,
                                                               y_test,
                                                               verbose=0)

        self.assertGreater(quantized_model_accuracy, 0.6)

        _, quantized_tflite_file = tempfile.mkstemp('.tflite')

        with quantize.quantize_scope():
            test_utils.convert_keras_to_tflite(
                model=quantized_model,
                output_path=quantized_tflite_file,
                is_quantized=True)
        quantized_model_tflite_accuracy = test_utils_mnist.eval_tflite(
            quantized_tflite_file)

        # Ensure accuracy for quantized TF and TFLite models are similar to original
        # model. There is no clear way to measure quantization, but for MNIST
        # results which differ a lot likely suggest an error in quantization.
        self.assertAllClose(model_accuracy,
                            quantized_model_accuracy,
                            rtol=0.2,
                            atol=0.2)
        self.assertAllClose(quantized_model_accuracy,
                            quantized_model_tflite_accuracy,
                            rtol=0.2,
                            atol=0.2)
    def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1(
            self, layer_type, kwargs):
        if not compat.is_v1_apis():
            return

        if 'input_shape' not in kwargs:
            kwargs['input_shape'] = (5, )

        layer = layer_type(**kwargs)
        model = tf.keras.Sequential([layer])
        quantized_model = quantize.quantize_model(model)

        with quantize.quantize_scope():
            test_utils.convert_keras_to_tflite(model=quantized_model,
                                               output_path=None,
                                               is_quantized=True,
                                               inference_type=tf.uint8,
                                               inference_input_type=tf.uint8,
                                               input_quant_params=(0., 1.))
Esempio n. 7
0
  def testModelEndToEnd(self, model_type):
    # 1. Check whether quantized model graph can be constructed.
    model = self._get_model(model_type)
    model = quantize.quantize_model(model)

    # 2. Sanity check to ensure basic training on random data works.
    x_train, y_train = self._create_test_data(model)
    model.compile(
        loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    model.fit(x_train, y_train)

    # 3. Ensure conversion to TFLite works.
    _, tflite_file = tempfile.mkstemp('.tflite')
    print('TFLite File: ', tflite_file)
    with quantize.quantize_scope():
      utils.convert_keras_to_tflite(model, tflite_file)

    # 4. Verify input runs on converted model.
    self._verify_tflite(tflite_file, x_train, y_train)
  def testEquivalentToTFLite(self):
    model = self._get_folded_batchnorm_model()

    _, keras_file = tempfile.mkstemp('.h5')
    _, tflite_file = tempfile.mkstemp('.tflite')

    model.compile(
        loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(
        np.random.uniform(0, 1, size=[1, 10, 10, 3]),
        np.random.uniform(0, 10, size=[1, 8, 8, 2]),
        epochs=1,
        callbacks=[])

    # Prepare for inference.
    inp = np.random.uniform(0, 1, size=[1, 10, 10, 3])
    inp = inp.astype(np.float32)

    # TensorFlow inference.
    tf_out = model.predict(inp)

    # TensorFlow Lite inference.
    tf.keras.models.save_model(model, keras_file)
    utils.convert_keras_to_tflite(
        keras_file,
        tflite_file,
        custom_objects={'_ConvBatchNorm2D': _ConvBatchNorm2D},
        is_quantized=False)

    interpreter = tf.lite.Interpreter(model_path=tflite_file)
    interpreter.allocate_tensors()
    input_index = interpreter.get_input_details()[0]['index']
    output_index = interpreter.get_output_details()[0]['index']

    interpreter.set_tensor(input_index, inp)
    interpreter.invoke()
    tflite_out = interpreter.get_tensor(output_index)

    # Taken from testFoldFusedBatchNorms from
    # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230
    self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)
    def testQuantizeSingleLayer_ProducesFullIntegerModel_TF2(
            self, layer_type, kwargs):
        # "FullInteger" in the sense that ignores inputs and outputs.
        if compat.is_v1_apis():
            return

        if 'input_shape' not in kwargs:
            kwargs['input_shape'] = (5, )

        layer = layer_type(**kwargs)
        model = tf.keras.Sequential([layer])
        quantized_model = quantize.quantize_model(model)

        _, quantized_tflite_file = tempfile.mkstemp('.tflite')

        with quantize.quantize_scope():
            test_utils.convert_keras_to_tflite(
                model=quantized_model,
                output_path=quantized_tflite_file,
                is_quantized=True,
                input_quant_params=(0., 1.),
                experimental_new_converter=True)

        interpreter = tf.lite.Interpreter(model_path=quantized_tflite_file)
        interpreter.allocate_tensors()

        input_tensor_details = interpreter.get_input_details()
        self.assertEqual(input_tensor_details[0]['dtype'], np.float32)

        output_tensor_details = interpreter.get_output_details()
        self.assertEqual(output_tensor_details[0]['dtype'], np.float32)

        tensor_details = interpreter.get_tensor_details()
        float_tensor_details = [
            t for t in tensor_details if t['dtype'] == np.float32
        ]
        # Only the input and outputs are float. The rest are integer.
        #
        # TODO(tfmot): update this test to use the full-integer path when available,
        # so that float_tensor_details should be length 0.
        self.assertLen(float_tensor_details, 2)
    def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1(
            self, layer_type, kwargs):
        if not compat.is_v1_apis():
            return

        if 'input_shape' not in kwargs:
            kwargs['input_shape'] = (5, )

        layer = layer_type(**kwargs)
        model = tf.keras.Sequential([layer])
        quantized_model = quantize.quantize_model(model)

        with quantize.quantize_scope():
            test_utils.convert_keras_to_tflite(
                model=quantized_model,
                output_path=None,
                is_quantized=True,
                inference_type=tf.uint8,
                inference_input_type=tf.uint8,
                input_quant_params=(0., 1.),
                # Set to False to throw errors when FakeQuants are
                # not placed everywhere to create full-integer model. Errors
                # are not thrown when set to True.
                experimental_new_converter=False)
Esempio n. 11
0
    def _test_equal_tf_and_tflite_outputs(self,
                                          tf_model,
                                          is_tflite_quantized=False):
        _, tflite_file = tempfile.mkstemp('.tflite')

        batched_input_shape = self._get_batched_input_shape()
        output_shape = self._get_output_shape()

        tf_model.compile(loss='categorical_crossentropy',
                         optimizer='sgd',
                         metrics=['accuracy'])

        tf_model.fit(np.random.uniform(0, 1, size=batched_input_shape),
                     np.random.uniform(0, 10, size=output_shape),
                     epochs=1,
                     callbacks=[])
        # Prepare for inference.
        inp = np.random.uniform(0, 1, size=batched_input_shape)
        inp = inp.astype(np.float32)

        if is_tflite_quantized:
            real_min = keras.backend.eval(
                tf_model.layers[-1]._activation_min_var)
            real_max = keras.backend.eval(
                tf_model.layers[-1]._activation_max_var)
            scale, zero_point = self._get_asymmetric_quant_params(
                real_min, real_max, -128.0, 127.0)

            # TFLite input needs to be quantized.
            real_input_min = 0.0
            real_input_max = 1.0
            inp_scale, inp_zp = self._get_asymmetric_quant_params(
                real_input_min, real_input_max, -128.0, 127.0)

            inp8 = np.round(inp / inp_scale + inp_zp)
            inp8 = inp8.astype(np.int8)

            # Dequant
            inp = (inp8.astype(np.float32) - inp_zp) * inp_scale

        # TensorFlow inference.
        tf_out = tf_model.predict(inp)

        # TensorFlow Lite inference.
        with quantize.quantize_scope():
            utils.convert_keras_to_tflite(
                tf_model,
                tflite_file,
                custom_objects={
                    '_ConvBatchNorm2D': _ConvBatchNorm2D,
                    '_DepthwiseConvBatchNorm2D': _DepthwiseConvBatchNorm2D,
                },
                is_quantized=is_tflite_quantized,
                inference_input_type=tf.lite.constants.INT8)

        interpreter = tf.lite.Interpreter(model_path=tflite_file)
        interpreter.allocate_tensors()
        input_index = interpreter.get_input_details()[0]['index']
        output_index = interpreter.get_output_details()[0]['index']

        if is_tflite_quantized:
            interpreter.set_tensor(input_index, inp8)
        else:
            interpreter.set_tensor(input_index, inp)

        interpreter.invoke()
        tflite_out = interpreter.get_tensor(output_index)

        if is_tflite_quantized:
            # dequantize outputs
            tflite_out = [scale * (x - zero_point) for x in tflite_out]

            # TODO(pulkitb): DConv quantized test somehow has a single value (0.065%)
            # of total values, which falls off by 1 scale. Investigate further and
            # introduce stricter testing by removing atol=scale.
            self.assertAllClose(tf_out, tflite_out, atol=scale)
        else:
            # Taken from testFoldFusedBatchNorms from
            # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230
            self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)