def testEndToEnd(self): """Test End to End clustering.""" original_model = keras.Sequential([ layers.Dense(2, input_shape=(2,)), layers.Dense(2), ]) clustered_model = cluster.cluster_weights(original_model, **self.params) clustered_model.compile( loss=keras.losses.categorical_crossentropy, optimizer="adam", metrics=["accuracy"], ) clustered_model.fit(x=self.dataset_generator(), steps_per_epoch=1) stripped_model = cluster.strip_clustering(clustered_model) _, tflite_file = tempfile.mkstemp(".tflite") _, keras_file = tempfile.mkstemp(".h5") if not compat.is_v1_apis(): converter = tf.lite.TFLiteConverter.from_keras_model(stripped_model) else: tf.keras.models.save_model(stripped_model, keras_file) converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() with open(tflite_file, "wb") as f: f.write(tflite_model) self._verify_tflite(tflite_file, self.x_train) os.remove(keras_file) os.remove(tflite_file)
def testReturnsConfig_KerasLayer(self): if not compat.is_v1_apis(): return model = keras.Sequential([( l.Dense(2, input_shape=(3,)))]) layer = model.layers[0] quantize_config = self.quantize_registry.get_quantize_config(layer) (weights, weight_quantizers) = self._convert_list( quantize_config.get_weights_and_quantizers(layer)) (activations, activation_quantizers) = self._convert_list( quantize_config.get_activations_and_quantizers(layer)) self._assert_weight_quantizers(weight_quantizers) self.assertEqual([layer.kernel], weights) self._assert_activation_quantizers(activation_quantizers) self.assertEqual([layer.activation], activations) quantize_kernel = keras.backend.variable( np.ones(layer.kernel.shape.as_list())) quantize_activation = keras.activations.relu quantize_config.set_quantize_weights(layer, [quantize_kernel]) quantize_config.set_quantize_activations(layer, [quantize_activation]) self._assert_kernel_equality(layer.kernel, quantize_kernel) self.assertEqual(layer.activation, quantize_activation)
def testQuantizedEquivalentToQuantizedTFLite(self): if not compat.is_v1_apis(): return tf_model = self._get_folded_batchnorm_model(is_quantized=True) self._test_equal_tf_and_tflite_outputs(tf_model, is_tflite_quantized=True)
def convert_keras_to_tflite(model, output_path, custom_objects=None, is_quantized=True, inference_type=None, inference_input_type=None, input_quant_params=(-128., 255.), experimental_new_converter=True): """Convert Keras model to TFLite.""" if custom_objects is None: custom_objects = {} if not compat.is_v1_apis(): converter = tf.lite.TFLiteConverter.from_keras_model(model) else: _, keras_file = tempfile.mkstemp('.h5') tf.keras.models.save_model(model, keras_file) converter = tf.lite.TFLiteConverter.from_keras_model_file( keras_file, custom_objects=custom_objects) converter.experimental_new_converter = experimental_new_converter if is_quantized: if not compat.is_v1_apis(): converter.optimizations = [tf.lite.Optimize.DEFAULT] else: converter.inference_type = tf.lite.constants.INT8 converter.inference_input_type = tf.lite.constants.FLOAT # TODO(tfmot): should be able to make everything use the # same inference_type in TF 1.X tests. if inference_type: converter.inference_type = inference_type if inference_input_type: converter.inference_input_type = inference_input_type input_arrays = converter.get_input_arrays() converter.quantized_input_stats = { input_arrays[0]: input_quant_params } # mean, std_dev values for float to quantized int8 values. tflite_model = converter.convert() if output_path is not None: with open(output_path, 'wb') as f: f.write(tflite_model) return tflite_model
def _log_pruning_metrics(self, logs, prefix, step): if compat.is_v1_apis(): # Safely depend on TF 1.X private API given # no more 1.X releases. self._write_custom_summaries(step, logs) else: with self._file_writer.as_default(): for name, value in logs.items(): tf.summary.scalar(name, value, step=step) self._file_writer.flush()
def testSetsQuantizeWeights(self): if not compat.is_v1_apis(): return layer = self._simple_dense_layer() quantize_kernel = K.variable(np.ones(layer.kernel.shape.as_list())) quantize_config = tflite_quantize_registry.TFLiteQuantizeConfig( ['kernel'], ['activation'], False) quantize_config.set_quantize_weights(layer, [quantize_kernel]) self._assert_kernel_equality(layer.kernel, quantize_kernel)
def __init__(self, log_dir, update_freq='epoch', **kwargs): if not isinstance(log_dir, six.string_types) or not log_dir: raise ValueError( '`log_dir` must be a non-empty string. You passed `log_dir`=' '{input}.'.format(input=log_dir)) super(PruningSummaries, self).__init__(log_dir=log_dir, update_freq=update_freq, **kwargs) if not compat.is_v1_apis(): # TF 2.X log_dir = self.log_dir + '/metrics' self._file_writer = tf.summary.create_file_writer(log_dir)
def testSerialization_SavedModel(self): if compat.is_v1_apis(): return model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) model_dir = tempfile.mkdtemp() tf.keras.models.save_model(quantized_model, model_dir) loaded_model = tf.keras.models.load_model(model_dir) self._assert_outputs_equal(quantized_model, loaded_model)
def _log_pruning_metrics(self, logs, prefix, step): if compat.is_v1_apis(): # Safely depend on TF 1.X private API given # no more 1.X releases. self._write_custom_summaries(step, logs) else: # TF 2.X log_dir = self.log_dir + '/metrics' file_writer = tf.summary.create_file_writer(log_dir) file_writer.set_as_default() for name, value in logs.items(): tf.summary.scalar(name, value, step=step) file_writer.flush()
def __init__(self, log_dir='logs', cluster_update_freq='epoch', **kwargs): super(ClusteringSummaries, self).__init__(log_dir=log_dir, **kwargs) if not isinstance(log_dir, str) or not log_dir: raise ValueError( '`log_dir` must be a non-empty string. You passed `log_dir`=' '{input}.'.format(input=log_dir)) self.cluster_update_freq = (1 if cluster_update_freq == 'batch' else cluster_update_freq) if compat.is_v1_apis(): # TF 1.X self.writer = tf.compat.v1.summary.FileWriter(log_dir) else: # TF 2.X self.writer = tf.summary.create_file_writer(log_dir) self.continuous_batch = 0
def testSerialization_TF1SavedModel(self): if not compat.is_v1_apis(): return model = test_utils.build_simple_dense_model() quantized_model = quantize.quantize_model(model) self._train_model(quantized_model) saved_model_dir = tempfile.mkdtemp() with quantize.quantize_scope(): tf.keras.experimental.export_saved_model(quantized_model, saved_model_dir) with quantize.quantize_scope(): loaded_model = tf.keras.experimental.load_from_saved_model( saved_model_dir) self._assert_outputs_equal(quantized_model, loaded_model)
def testQuantizesMnist(self): if not compat.is_v1_apis(): return model = test_utils_mnist.sequential_model() x_train, y_train, x_test, y_test = test_utils_mnist.preprocessed_data() model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=500) _, model_accuracy = model.evaluate(x_test, y_test, verbose=0) quantized_model = quantize.quantize_model(model) quantized_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) quantized_model.fit(x_train, y_train, batch_size=500) _, quantized_model_accuracy = quantized_model.evaluate(x_test, y_test, verbose=0) self.assertGreater(quantized_model_accuracy, 0.6) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True) quantized_model_tflite_accuracy = test_utils_mnist.eval_tflite( quantized_tflite_file) # Ensure accuracy for quantized TF and TFLite models are similar to original # model. There is no clear way to measure quantization, but for MNIST # results which differ a lot likely suggest an error in quantization. self.assertAllClose(model_accuracy, quantized_model_accuracy, rtol=0.2, atol=0.2) self.assertAllClose(quantized_model_accuracy, quantized_model_tflite_accuracy, rtol=0.2, atol=0.2)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite(model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.))
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF2( self, layer_type, kwargs): # "FullInteger" in the sense that ignores inputs and outputs. if compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) _, quantized_tflite_file = tempfile.mkstemp('.tflite') with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=quantized_tflite_file, is_quantized=True, input_quant_params=(0., 1.), experimental_new_converter=True) interpreter = tf.lite.Interpreter(model_path=quantized_tflite_file) interpreter.allocate_tensors() input_tensor_details = interpreter.get_input_details() self.assertEqual(input_tensor_details[0]['dtype'], np.float32) output_tensor_details = interpreter.get_output_details() self.assertEqual(output_tensor_details[0]['dtype'], np.float32) tensor_details = interpreter.get_tensor_details() float_tensor_details = [ t for t in tensor_details if t['dtype'] == np.float32 ] # Only the input and outputs are float. The rest are integer. # # TODO(tfmot): update this test to use the full-integer path when available, # so that float_tensor_details should be length 0. self.assertLen(float_tensor_details, 2)
def testReturnsConfig_KerasRNNLayer(self): if not compat.is_v1_apis(): return model = keras.Sequential([( l.LSTM(2, input_shape=(3, 2)))]) layer = model.layers[0] quantize_config = self.quantize_registry.get_quantize_config(layer) (weights, weight_quantizers) = self._convert_list( quantize_config.get_weights_and_quantizers(layer)) (activations, activation_quantizers) = self._convert_list( quantize_config.get_activations_and_quantizers(layer)) self._assert_weight_quantizers(weight_quantizers) self.assertEqual([layer.cell.kernel, layer.cell.recurrent_kernel], weights) self._assert_activation_quantizers(activation_quantizers) self.assertEqual( [layer.cell.activation, layer.cell.recurrent_activation], activations)
def testQuantizeSingleLayer_ProducesFullIntegerModel_TF1( self, layer_type, kwargs): if not compat.is_v1_apis(): return if 'input_shape' not in kwargs: kwargs['input_shape'] = (5, ) layer = layer_type(**kwargs) model = tf.keras.Sequential([layer]) quantized_model = quantize.quantize_model(model) with quantize.quantize_scope(): test_utils.convert_keras_to_tflite( model=quantized_model, output_path=None, is_quantized=True, inference_type=tf.uint8, inference_input_type=tf.uint8, input_quant_params=(0., 1.), # Set to False to throw errors when FakeQuants are # not placed everywhere to create full-integer model. Errors # are not thrown when set to True. experimental_new_converter=False)
def testEquivalentToFloatTFLite(self): if not compat.is_v1_apis(): return tf_model = self._get_folded_batchnorm_model(is_quantized=False) self._test_equal_tf_and_tflite_outputs(tf_model)
def testSupports_KerasRNNLayers(self): if not compat.is_v1_apis(): return self.assertTrue(self.quantize_registry.supports(l.LSTM(10))) self.assertTrue(self.quantize_registry.supports(l.GRU(10)))