def testModelEndToEnd(self, model_fn): # 1. Check whether quantized model graph can be constructed. model = model_fn(self) model = quantize.quantize_model(model) # 2. Sanity check to ensure basic training on random data works. x_train, y_train = self._create_test_data(model) model.compile(loss='mse', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, epochs=100) x_test, y_test = self._create_test_data(model) y_tf = model.predict(x_test) # 3. Ensure conversion to TFLite works. _, tflite_file = tempfile.mkstemp('.tflite') print('TFLite File: ', tflite_file) with quantize.quantize_scope(): utils.convert_keras_to_tflite(model, tflite_file) # 4. Verify input runs on converted model. y_tfl = self._execute_tflite(tflite_file, x_test, y_test) # 5. Verify results are the same in TF and TFL. # TODO(pulkitb): Temporarily raise tolerances since some rounding # changes in x86 kernels are causing values to differ by 'scale'. self.assertAllClose(y_tf, y_tfl, atol=1e-1, rtol=1e-1)
def _test_equivalent_to_tflite(self, model, is_tflite_quantized=False): _, keras_file = tempfile.mkstemp('.h5') _, tflite_file = tempfile.mkstemp('.tflite') model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(np.random.uniform(0, 1, size=[self.batch_size, 10, 10, 3]), np.random.uniform(0, 10, size=[self.batch_size, 8, 8, 2]), epochs=1, callbacks=[]) # Prepare for inference. inp = np.random.uniform(0, 1, size=[self.batch_size, 10, 10, 3]) inp = inp.astype(np.float32) # TensorFlow inference. tf_out = model.predict(inp) if is_tflite_quantized: scale, zero_point = self._compute_quantization_params(model) # TFLite input needs to be quantized. inp = inp * 255 inp = inp.astype(np.uint8) # TensorFlow Lite inference. tf.keras.models.save_model(model, keras_file) with quantize.quantize_scope(): utils.convert_keras_to_tflite( keras_file, tflite_file, custom_objects={'_ConvBatchNorm2D': _ConvBatchNorm2D}, is_quantized=is_tflite_quantized) interpreter = tf.lite.Interpreter(model_path=tflite_file) interpreter.allocate_tensors() input_index = interpreter.get_input_details()[0]['index'] output_index = interpreter.get_output_details()[0]['index'] interpreter.set_tensor(input_index, inp) interpreter.invoke() tflite_out = interpreter.get_tensor(output_index) if is_tflite_quantized: # dequantize outputs tflite_out = [scale * (x - zero_point) for x in tflite_out] # Off by 1 in quantized output. Notably we cannot reduce this. There is # an existing mismatch between TensorFlow and TFLite (from # contrib.quantize days). self.assertAllClose(tf_out, tflite_out, atol=scale) else: # Taken from testFoldFusedBatchNorms from # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230 self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)
def testMnistAccuracyinTFLite(self): num_classes = 10 train_data, test_data, input_shape = test_utils.get_preprocessed_mnist_data( num_classes=num_classes) x_train, y_train = train_data x_test, y_test = test_data def linear(x): return x l = keras.layers model = keras.Sequential([ QuantizeEmulate(l.Conv2D(32, 5, padding='same', activation='relu'), input_shape=input_shape, **self.params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), QuantizeEmulate(l.Conv2D(64, 5, padding='same', activation='relu'), **self.params), l.MaxPooling2D((2, 2), (2, 2), padding='same'), l.Flatten(), QuantizeEmulate(l.Dense(1024, activation='relu'), **self.params), l.Dropout(0.4), # TODO(alanchiao): fuse softmax once we've handled it. # Once we use QuantizeAwareActivation, pre/post activation should be # handled. Adding dummy activation to force adding of quant operator. QuantizeEmulate(l.Dense(num_classes, activation=linear), **self.params), l.Softmax(), ]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=128, epochs=1, validation_data=(x_test, y_test)) tf_accuracy = model.evaluate(x_test, y_test, verbose=0)[1] # High enough to validate that training is happening, with significantly # better than 0.1 random accuracy. self.assertGreater(tf_accuracy, 0.4) _, keras_file = tempfile.mkstemp('.h5') _, tflite_file = tempfile.mkstemp('.h5') keras.models.save_model(model, keras_file) utils.convert_keras_to_tflite(keras_file, tflite_file, {'linear': linear}) tflite_accuracy = test_utils.eval_mnist_tflite(tflite_file, is_quantized=True) self.assertAlmostEqual(tf_accuracy, tflite_accuracy, delta=0.01)
def testProductionModelConversionToTFLite(self): # small input shape to keep test running quickly. model = tf.keras.applications.mobilenet.MobileNet(weights=None, input_shape=(32, 32, 3)) annotated = quantize_annotate(model) quantized_model = quantize_apply(annotated) _, tflite_file = tempfile.mkstemp('.h5') with quantize.quantize_scope(): utils.convert_keras_to_tflite(quantized_model, tflite_file)
def testQuantizesMnist(self): if not compat.is_v1_apis(): return model = test_utils_mnist.sequential_model() x_train, y_train, x_test, y_test = test_utils_mnist.preprocessed_data() model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=500) _, model_accuracy = model.evaluate(x_test, y_test, verbose=0) quantized_model = quantize.quantize_model(model) quantized_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) quantized_model.fit(x_train, y_train, batch_size=500) _, quantized_model_accuracy = quantized_model.evaluate(x_test, y_test, verbose=0) self.assertGreater(quantized_model_accuracy, 0.6) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True) quantized_model_tflite_accuracy = test_utils_mnist.eval_tflite( quantized_tflite_file) # Ensure accuracy for quantized TF and TFLite models are similar to original # model. There is no clear way to measure quantization, but for MNIST # results which differ a lot likely suggest an error in quantization. self.assertAllClose(model_accuracy, quantized_model_accuracy, rtol=0.2, atol=0.2) self.assertAllClose(quantized_model_accuracy, quantized_model_tflite_accuracy, rtol=0.2, atol=0.2)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite(model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.))
def testModelEndToEnd(self, model_type): # 1. Check whether quantized model graph can be constructed. model = self._get_model(model_type) model = quantize.quantize_model(model) # 2. Sanity check to ensure basic training on random data works. x_train, y_train = self._create_test_data(model) model.compile( loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train) # 3. Ensure conversion to TFLite works. _, tflite_file = tempfile.mkstemp('.tflite') print('TFLite File: ', tflite_file) with quantize.quantize_scope(): utils.convert_keras_to_tflite(model, tflite_file) # 4. Verify input runs on converted model. self._verify_tflite(tflite_file, x_train, y_train)
def testEquivalentToTFLite(self): model = self._get_folded_batchnorm_model() _, keras_file = tempfile.mkstemp('.h5') _, tflite_file = tempfile.mkstemp('.tflite') model.compile( loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit( np.random.uniform(0, 1, size=[1, 10, 10, 3]), np.random.uniform(0, 10, size=[1, 8, 8, 2]), epochs=1, callbacks=[]) # Prepare for inference. inp = np.random.uniform(0, 1, size=[1, 10, 10, 3]) inp = inp.astype(np.float32) # TensorFlow inference. tf_out = model.predict(inp) # TensorFlow Lite inference. tf.keras.models.save_model(model, keras_file) utils.convert_keras_to_tflite( keras_file, tflite_file, custom_objects={'_ConvBatchNorm2D': _ConvBatchNorm2D}, is_quantized=False) interpreter = tf.lite.Interpreter(model_path=tflite_file) interpreter.allocate_tensors() input_index = interpreter.get_input_details()[0]['index'] output_index = interpreter.get_output_details()[0]['index'] interpreter.set_tensor(input_index, inp) interpreter.invoke() tflite_out = interpreter.get_tensor(output_index) # Taken from testFoldFusedBatchNorms from # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230 self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF2( self, layer_type, kwargs): # "FullInteger" in the sense that ignores inputs and outputs. if compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True, input_quant_params=(0., 1.), experimental_new_converter=True) interpreter = tf.lite.Interpreter(model_path=quantized_tflite_file) interpreter.allocate_tensors() input_tensor_details = interpreter.get_input_details() self.assertEqual(input_tensor_details[0]['dtype'], np.float32) output_tensor_details = interpreter.get_output_details() self.assertEqual(output_tensor_details[0]['dtype'], np.float32) tensor_details = interpreter.get_tensor_details() float_tensor_details = [ t for t in tensor_details if t['dtype'] == np.float32 ] # Only the input and outputs are float. The rest are integer. # # TODO(tfmot): update this test to use the full-integer path when available, # so that float_tensor_details should be length 0. self.assertLen(float_tensor_details, 2)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.), # Set to False to throw errors when FakeQuants are # not placed everywhere to create full-integer model. Errors # are not thrown when set to True. experimental_new_converter=False)
def _test_equal_tf_and_tflite_outputs(self, tf_model, is_tflite_quantized=False): _, tflite_file = tempfile.mkstemp('.tflite') batched_input_shape = self._get_batched_input_shape() output_shape = self._get_output_shape() tf_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) tf_model.fit(np.random.uniform(0, 1, size=batched_input_shape), np.random.uniform(0, 10, size=output_shape), epochs=1, callbacks=[]) # Prepare for inference. inp = np.random.uniform(0, 1, size=batched_input_shape) inp = inp.astype(np.float32) if is_tflite_quantized: real_min = keras.backend.eval( tf_model.layers[-1]._activation_min_var) real_max = keras.backend.eval( tf_model.layers[-1]._activation_max_var) scale, zero_point = self._get_asymmetric_quant_params( real_min, real_max, -128.0, 127.0) # TFLite input needs to be quantized. real_input_min = 0.0 real_input_max = 1.0 inp_scale, inp_zp = self._get_asymmetric_quant_params( real_input_min, real_input_max, -128.0, 127.0) inp8 = np.round(inp / inp_scale + inp_zp) inp8 = inp8.astype(np.int8) # Dequant inp = (inp8.astype(np.float32) - inp_zp) * inp_scale # TensorFlow inference. tf_out = tf_model.predict(inp) # TensorFlow Lite inference. with quantize.quantize_scope(): utils.convert_keras_to_tflite( tf_model, tflite_file, custom_objects={ '_ConvBatchNorm2D': _ConvBatchNorm2D, '_DepthwiseConvBatchNorm2D': _DepthwiseConvBatchNorm2D, }, is_quantized=is_tflite_quantized, inference_input_type=tf.lite.constants.INT8) interpreter = tf.lite.Interpreter(model_path=tflite_file) interpreter.allocate_tensors() input_index = interpreter.get_input_details()[0]['index'] output_index = interpreter.get_output_details()[0]['index'] if is_tflite_quantized: interpreter.set_tensor(input_index, inp8) else: interpreter.set_tensor(input_index, inp) interpreter.invoke() tflite_out = interpreter.get_tensor(output_index) if is_tflite_quantized: # dequantize outputs tflite_out = [scale * (x - zero_point) for x in tflite_out] # TODO(pulkitb): DConv quantized test somehow has a single value (0.065%) # of total values, which falls off by 1 scale. Investigate further and # introduce stricter testing by removing atol=scale. self.assertAllClose(tf_out, tflite_out, atol=scale) else: # Taken from testFoldFusedBatchNorms from # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference_test.py#L230 self.assertAllClose(tf_out, tflite_out, rtol=1e-04, atol=1e-06)