def test_ds_tc_resnet_stream_tflite(self): """Test for tflite streaming with external state.""" tflite_streaming_model = utils.model_to_tflite( self.sess, self.model, self.params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() # before processing new test sequence we reset model state inputs = [] for detail in interpreter.get_input_details(): inputs.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out = inference.run_stream_inference_classification_tflite( self.params, interpreter, self.input_data, inputs) self.assertAllClose(stream_out, self.non_stream_out, atol=1e-5)
def test_ds_tc_resnet_stream_internal_tflite(self): """Test tflite streaming with internal state.""" test_utils.set_seed(123) tf.keras.backend.set_learning_phase(0) params = utils.ds_tc_resnet_model_params(True) model = ds_tc_resnet.model(params) model.summary() input_data = np.random.rand(params.batch_size, params.desired_samples) # run non streaming inference non_stream_out = model.predict(input_data) tflite_streaming_model = utils.model_to_tflite( None, model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() stream_out = inference.run_stream_inference_classification_tflite( params, interpreter, input_data, input_states=None) self.assertAllClose(stream_out, non_stream_out, atol=1e-5)
def test_cnn_model_end_to_end(self): config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) test_utils.set_seed(123) # data parameters num_time_bins = 12 feature_size = 12 # model params. total_stride = 2 params = test_utils.Params([total_stride], 0) params.model_name = 'cnn' params.cnn_filters = '2' params.cnn_kernel_size = '(3,3)' params.cnn_act = "'relu'" params.cnn_dilation_rate = '(1,1)' params.cnn_strides = '(2,2)' params.dropout1 = 0.5 params.units2 = '' params.act2 = '' params.label_count = 2 params.return_softmax = True params.quantize = 1 # apply quantization aware training params.data_shape = (num_time_bins, feature_size) params.preprocess = 'custom' model = cnn.model(params) model.summary() # prepare training and testing data train_images, train_labels = test_utils.generate_data( img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32) test_images = train_images test_labels = train_labels # create and train quantization aware model in non streaming mode model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) model.fit( train_images, train_labels, epochs=1, validation_data=(test_images, test_labels)) model.summary() # one test image train_image = train_images[:1,] # run tf non streaming inference non_stream_output_tf = model.predict(train_image) # specify input data shape for streaming mode params.data_shape = (total_stride, feature_size) # TODO(rybakov) add params structure for model with no feature extractor # prepare tf streaming model and use it to generate representative_dataset with quantize.quantize_scope(): stream_quantized_model = utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) calibration_data = prepare_calibration_data(stream_quantized_model, total_stride, train_image) def representative_dataset(dtype): def _representative_dataset_gen(): for i in range(len(calibration_data)): yield [ calibration_data[i][0].astype(dtype), # input audio packet calibration_data[i][1].astype(dtype), # conv state calibration_data[i][2].astype(dtype) # flatten state ] return _representative_dataset_gen # convert streaming quantization aware model to tflite # and apply post training quantization with quantize.quantize_scope(): tflite_streaming_model = utils.model_to_tflite( sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE, optimizations=[tf.lite.Optimize.DEFAULT], inference_type=tf.int8, experimental_new_quantizer=True, representative_dataset=representative_dataset(np.float32)) # run tflite in streaming mode and compare output logits with tf interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_states = [] for detail in interpreter.get_input_details(): input_states.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out_tflite = inference.run_stream_inference_classification_tflite( params, interpreter, train_image, input_states) self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)
def tflite_stream_state_external_model_accuracy( flags, folder, tflite_model_name='stream_state_external.tflite', accuracy_name='tflite_stream_state_external_model_accuracy.txt', reset_state=False): """Compute accuracy of streamable model with external state using TFLite. Args: flags: model and data settings folder: folder name where model is located tflite_model_name: file name with tflite model accuracy_name: file name for storing accuracy in path + accuracy_name reset_state: reset state between testing sequences. If True - then it is non streaming testing environment: state will be reseted in the beginning of every test sequence and will not be transferred to another one (as it is done in real streaming). Returns: accuracy """ tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) path = os.path.join(flags.train_dir, folder) logging.info('tflite stream model state external with reset_state %d', reset_state) audio_processor = input_data.AudioProcessor(flags) set_size = audio_processor.set_size('testing') interpreter = tf.lite.Interpreter( model_path=os.path.join(path, tflite_model_name)) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() inputs = [] for s in range(len(input_details)): inputs.append(np.zeros(input_details[s]['shape'], dtype=np.float32)) total_accuracy = 0.0 count = 0.0 inference_batch_size = 1 for i in range(0, set_size, inference_batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( inference_batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0, sess) # before processing new test sequence we can reset model state # if we reset model state then it is not real streaming mode if reset_state: for s in range(len(input_details)): inputs[s] = np.zeros(input_details[s]['shape'], dtype=np.float32) if flags.preprocess == 'raw': out_tflite = inference.run_stream_inference_classification_tflite( flags, interpreter, test_fingerprints, inputs) out_tflite_argmax = np.argmax(out_tflite) else: for t in range(test_fingerprints.shape[1]): # get new frame from stream of data stream_update = test_fingerprints[:, t, :] stream_update = np.expand_dims(stream_update, axis=1) # [batch, time=1, feature] stream_update = stream_update.astype(np.float32) # set input audio data (by default input data at index 0) interpreter.set_tensor(input_details[0]['index'], stream_update) # set input states (index 1...) for s in range(1, len(input_details)): interpreter.set_tensor(input_details[s]['index'], inputs[s]) # run inference interpreter.invoke() # get output: classification out_tflite = interpreter.get_tensor(output_details[0]['index']) # get output states and set it back to input states # which will be fed in the next inference cycle for s in range(1, len(input_details)): # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. inputs[s] = interpreter.get_tensor( output_details[s]['index']) out_tflite_argmax = np.argmax(out_tflite) total_accuracy = total_accuracy + (test_ground_truth[0] == out_tflite_argmax) count = count + 1 if i % 200 == 0 and i: logging.info( 'tflite test accuracy, stream model state external = %f %d out of %d', *(total_accuracy * 100 / count, i, set_size)) total_accuracy = total_accuracy / count logging.info( 'tflite Final test accuracy, stream model state external = %.2f%% (N=%d)', *(total_accuracy * 100, set_size)) with open(os.path.join(path, accuracy_name), 'wt') as fd: fd.write('%f on set_size %d' % (total_accuracy * 100, set_size)) return total_accuracy * 100