def testPreprocessStreamInferenceModeTFandTFLite(self, preprocess, feature_type, model_name='gru'): # Validate that model with different preprocessing # can be converted to stream inference mode with TF and TFLite. params = model_params.HOTWORD_MODEL_PARAMS[model_name] # set parameters to test params.preprocess = preprocess params.feature_type = feature_type params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite streaming inference # with external states self.assertTrue(utils.model_to_tflite( self.sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with external states self.assertTrue(utils.to_streaming_inference( model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with internal states self.assertTrue(utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE))
def testToNonStreamInferenceTFandTFLite(self, model_name='svdf'): """Validate that model can be converted to non stream inference mode.""" params = _HOTWORD_MODEL_PARAMS[model_name] params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TF non streaming inference model # it will disable dropouts self.assertTrue( utils.to_streaming_inference(model, params, Modes.NON_STREAM_INFERENCE)) # convert TF non streaming model to TFLite non streaming inference self.assertTrue( utils.model_to_tflite(self.sess, model, params, Modes.NON_STREAM_INFERENCE))
def test_ds_tc_resnet_stream_tflite(self): tflite_streaming_model = utils.model_to_tflite( self.sess, self.model, self.params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() # before processing new test sequence we reset model state inputs = [] for detail in interpreter.get_input_details(): inputs.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out = test.run_stream_inference_classification_tflite( self.params, interpreter, self.input_data, inputs) self.assertAllClose(stream_out, self.non_stream_out, atol=1e-5)
def _testTFLite(self, preprocess='raw', feature_type='mfcc_op', model_name='svdf'): params = model_params.HOTWORD_MODEL_PARAMS[model_name] # set parameters to test params.preprocess = preprocess params.feature_type = feature_type params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite non streaming inference self.assertTrue( utils.model_to_tflite(self.sess, model, params, Modes.NON_STREAM_INFERENCE))
def convert_model_tflite(flags, folder, mode, fname, weights_name='best_weights', optimizations=None): """Convert model to streaming and non streaming TFLite. Args: flags: model and data settings folder: folder where converted model will be saved mode: inference mode fname: file name of converted model weights_name: file name with model weights optimizations: list of optimization options """ tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) tf.keras.backend.set_learning_phase(0) flags.batch_size = 1 # set batch size for inference flags.approximate_gelu = True # use approximate GELU in TFLite model = models.MODELS[flags.model_name](flags) weights_path = os.path.join(flags.train_dir, weights_name) model.load_weights(weights_path).expect_partial() # convert trained model to non streaming TFLite stateless # to finish other tests we do not stop program if exception happen here path_model = os.path.join(flags.train_dir, folder) if not os.path.exists(path_model): os.makedirs(path_model) try: with open(os.path.join(path_model, fname), 'wb') as fd: fd.write( utils.model_to_tflite(sess, model, flags, mode, path_model, optimizations)) except IOError as e: logging.warning('FAILED to write file: %s', e) except (ValueError, AttributeError, RuntimeError, TypeError) as e: logging.warning('FAILED to convert to mode %s, tflite: %s', mode, e)
def _testTFLite(self, preprocess='raw', feature_type='mfcc_tf', use_tf_fft=False, model_name='svdf'): params = model_params.HOTWORD_MODEL_PARAMS[model_name] params.clip_duration_ms = 100 # make it shorter for testing # set parameters to test params.preprocess = preprocess params.feature_type = feature_type params.use_tf_fft = use_tf_fft params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite non streaming inference self.assertTrue( utils.model_to_tflite(self.sess, model, params, modes.Modes.NON_STREAM_INFERENCE))
def testToStreamInferenceModeTFandTFLite(self, model_name='gru'): """Validate that model can be converted to any streaming inference mode.""" params = _HOTWORD_MODEL_PARAMS[model_name] params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite streaming inference # with external states self.assertTrue( utils.model_to_tflite(self.sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with external states self.assertTrue( utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with internal states self.assertTrue( utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE))
def test_ds_tc_resnet_stream_internal_tflite(self): """Test tflite streaming with internal state.""" test_utils.set_seed(123) tf.keras.backend.set_learning_phase(0) params = utils.ds_tc_resnet_model_params(True) model = ds_tc_resnet.model(params) model.summary() input_data = np.random.rand(params.batch_size, params.desired_samples) # run non streaming inference non_stream_out = model.predict(input_data) tflite_streaming_model = utils.model_to_tflite( None, model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() stream_out = inference.run_stream_inference_classification_tflite( params, interpreter, input_data, input_states=None) self.assertAllClose(stream_out, non_stream_out, atol=1e-5)
def test_model_to_tflite(self): """TFLite supports stateless graphs.""" self.assertTrue( utils.model_to_tflite(self.sess, self.model, self.flags))
def test_cnn_model_end_to_end(self): config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) test_utils.set_seed(123) # data parameters num_time_bins = 12 feature_size = 12 # model params. total_stride = 2 params = test_utils.Params([total_stride], 0) params.model_name = 'cnn' params.cnn_filters = '2' params.cnn_kernel_size = '(3,3)' params.cnn_act = "'relu'" params.cnn_dilation_rate = '(1,1)' params.cnn_strides = '(2,2)' params.dropout1 = 0.5 params.units2 = '' params.act2 = '' params.label_count = 2 params.return_softmax = True params.quantize = 1 # apply quantization aware training params.data_shape = (num_time_bins, feature_size) params.preprocess = 'custom' model = cnn.model(params) model.summary() # prepare training and testing data train_images, train_labels = test_utils.generate_data( img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32) test_images = train_images test_labels = train_labels # create and train quantization aware model in non streaming mode model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) model.fit( train_images, train_labels, epochs=1, validation_data=(test_images, test_labels)) model.summary() # one test image train_image = train_images[:1,] # run tf non streaming inference non_stream_output_tf = model.predict(train_image) # specify input data shape for streaming mode params.data_shape = (total_stride, feature_size) # TODO(rybakov) add params structure for model with no feature extractor # prepare tf streaming model and use it to generate representative_dataset with quantize.quantize_scope(): stream_quantized_model = utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) calibration_data = prepare_calibration_data(stream_quantized_model, total_stride, train_image) def representative_dataset(dtype): def _representative_dataset_gen(): for i in range(len(calibration_data)): yield [ calibration_data[i][0].astype(dtype), # input audio packet calibration_data[i][1].astype(dtype), # conv state calibration_data[i][2].astype(dtype) # flatten state ] return _representative_dataset_gen # convert streaming quantization aware model to tflite # and apply post training quantization with quantize.quantize_scope(): tflite_streaming_model = utils.model_to_tflite( sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE, optimizations=[tf.lite.Optimize.DEFAULT], inference_type=tf.int8, experimental_new_quantizer=True, representative_dataset=representative_dataset(np.float32)) # run tflite in streaming mode and compare output logits with tf interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_states = [] for detail in interpreter.get_input_details(): input_states.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out_tflite = inference.run_stream_inference_classification_tflite( params, interpreter, train_image, input_states) self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)
def test_streaming_on_1d_data_strides(self, stride): """Tests Conv2DTranspose on 1d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data: [batch, time, 1, channels] x = np.random.rand(1, params.desired_samples, 1, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 1), strides=(stride, 1), channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, 1, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) # Convert TF non-streaming model to TFLite external-state streaming model. tflite_streaming_model = utils.model_to_tflite( sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) self.assertTrue(tflite_streaming_model) # Run TFLite external-state streaming inference. interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() input_states = [] # before processing test sequence we create model state for s in range(len(input_details)): input_states.append(np.zeros(input_details[s]['shape'], dtype=np.float32)) stream_out_tflite_external_st = inference.run_stream_inference_tflite( params, interpreter, inp_audio, input_states, concat=True) # compare streaming TFLite with external-state vs TF non-streaming self.assertAllClose(stream_out_tflite_external_st, non_stream_out)