def testQuantizeModel_Passes(self): model = keras.Sequential([ keras.layers.Dense(10, input_shape=(5, )), keras.layers.Dropout(0.4) ]) quantize.quantize_model(model)
def testSerialization_TFCheckpoint(self): model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) _, tf_weights = tempfile.mkstemp('.tf') quantized_model.save_weights(tf_weights) same_architecture_model = test_utils.build_simple_dense_model() same_architecture_model = quantize.quantize_model(same_architecture_model) same_architecture_model.load_weights(tf_weights) self._assert_outputs_equal(quantized_model, same_architecture_model)
def testModelEndToEnd(self, model_fn): # 1. Check whether quantized model graph can be constructed. model = model_fn(self) model = quantize.quantize_model(model) # 2. Sanity check to ensure basic training on random data works. x_train, y_train = self._create_test_data(model) model.compile(loss='mse', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, epochs=100) x_test, y_test = self._create_test_data(model) y_tf = model.predict(x_test) # 3. Ensure conversion to TFLite works. _, tflite_file = tempfile.mkstemp('.tflite') print('TFLite File: ', tflite_file) with quantize.quantize_scope(): utils.convert_keras_to_tflite(model, tflite_file) # 4. Verify input runs on converted model. y_tfl = self._execute_tflite(tflite_file, x_test, y_test) # 5. Verify results are the same in TF and TFL. # TODO(pulkitb): Temporarily raise tolerances since some rounding # changes in x86 kernels are causing values to differ by 'scale'. self.assertAllClose(y_tf, y_tfl, atol=1e-1, rtol=1e-1)
def testSerialization_KerasModel(self): model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) _, model_file = tempfile.mkstemp('.h5') tf.keras.models.save_model(quantized_model, model_file) with quantize.quantize_scope(): loaded_model = tf.keras.models.load_model(model_file) self._assert_models_equal(quantized_model, loaded_model)
def testSerialization_SavedModel(self): if compat.is_v1_apis(): return model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) model_dir = tempfile.mkdtemp() tf.keras.models.save_model(quantized_model, model_dir) loaded_model = tf.keras.models.load_model(model_dir) self._assert_outputs_equal(quantized_model, loaded_model)
def testQuantizesMnist(self): if not compat.is_v1_apis(): return model = test_utils_mnist.sequential_model() x_train, y_train, x_test, y_test = test_utils_mnist.preprocessed_data() model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=500) _, model_accuracy = model.evaluate(x_test, y_test, verbose=0) quantized_model = quantize.quantize_model(model) quantized_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) quantized_model.fit(x_train, y_train, batch_size=500) _, quantized_model_accuracy = quantized_model.evaluate(x_test, y_test, verbose=0) self.assertGreater(quantized_model_accuracy, 0.6) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True) quantized_model_tflite_accuracy = test_utils_mnist.eval_tflite( quantized_tflite_file) # Ensure accuracy for quantized TF and TFLite models are similar to original # model. There is no clear way to measure quantization, but for MNIST # results which differ a lot likely suggest an error in quantization. self.assertAllClose(model_accuracy, quantized_model_accuracy, rtol=0.2, atol=0.2) self.assertAllClose(quantized_model_accuracy, quantized_model_tflite_accuracy, rtol=0.2, atol=0.2)
def testSerialization_TF1SavedModel(self): if not compat.is_v1_apis(): return model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) saved_model_dir = tempfile.mkdtemp() with quantize.quantize_scope(): tf.keras.experimental.export_saved_model(quantized_model, saved_model_dir) with quantize.quantize_scope(): loaded_model = tf.keras.experimental.load_from_saved_model( saved_model_dir) self._assert_outputs_equal(quantized_model, loaded_model)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite(model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.))
def testModelEndToEnd(self, model_type): # 1. Check whether quantized model graph can be constructed. model = self._get_model(model_type) model = quantize.quantize_model(model) # 2. Sanity check to ensure basic training on random data works. x_train, y_train = self._create_test_data(model) model.compile( loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train) # 3. Ensure conversion to TFLite works. _, tflite_file = tempfile.mkstemp('.tflite') print('TFLite File: ', tflite_file) with quantize.quantize_scope(): utils.convert_keras_to_tflite(model, tflite_file) # 4. Verify input runs on converted model. self._verify_tflite(tflite_file, x_train, y_train)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF2( self, layer_type, kwargs): # "FullInteger" in the sense that ignores inputs and outputs. if compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True, input_quant_params=(0., 1.), experimental_new_converter=True) interpreter = tf.lite.Interpreter(model_path=quantized_tflite_file) interpreter.allocate_tensors() input_tensor_details = interpreter.get_input_details() self.assertEqual(input_tensor_details[0]['dtype'], np.float32) output_tensor_details = interpreter.get_output_details() self.assertEqual(output_tensor_details[0]['dtype'], np.float32) tensor_details = interpreter.get_tensor_details() float_tensor_details = [ t for t in tensor_details if t['dtype'] == np.float32 ] # Only the input and outputs are float. The rest are integer. # # TODO(tfmot): update this test to use the full-integer path when available, # so that float_tensor_details should be length 0. self.assertLen(float_tensor_details, 2)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.), # Set to False to throw errors when FakeQuants are # not placed everywhere to create full-integer model. Errors # are not thrown when set to True. experimental_new_converter=False)
def testQuantizeLayer_Fails(self): layer = keras.layers.Dense(10, input_shape=(5, )) with self.assertRaises(ValueError): quantize.quantize_model(layer)