def setUp(self): super(UtilsTest, self).setUp() tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf1.Session(config=config) tf1.keras.backend.set_session(self.sess)
def setUp(self): super(UtilsTest, self).setUp() tf1.reset_default_graph() self.sess = tf1.Session() tf1.keras.backend.set_session(self.sess) self.flags = Flags() self.flags.desired_samples = 16000 self.flags.window_size_ms = 30.0 self.flags.window_stride_ms = 20.0 self.flags.sample_rate = 16000.0 self.flags.window_stride_samples = 320 self.flags.window_size_samples = 480 self.flags.label_count = 3 self.flags.preemph = 0.0 self.flags.window_type = 'hann' self.flags.mel_num_bins = 40 self.flags.mel_lower_edge_hertz = 20 self.flags.mel_upper_edge_hertz = 4000 self.flags.fft_magnitude_squared = False self.flags.dct_num_features = 10 self.flags.use_tf_fft = False self.flags.units1 = '32' self.flags.act1 = "'relu'" self.flags.pool_size = 2 self.flags.strides = 2 self.flags.dropout1 = 0.1 self.flags.units2 = '256,256' self.flags.act2 = "'relu','relu'" self.flags.train_dir = FLAGS.test_tmpdir self.flags.mel_non_zero_only = 1 self.flags.batch_size = 1 self.model = dnn.model(self.flags) self.model.summary()
def setUp(self): super(InverseSTFTTest, self).setUp() test_utils.set_seed(123) self.frame_size = 32 self.frame_step = 8 # layer definition inverse_stft_layer = inverse_stft.InverseSTFT(self.frame_size, self.frame_step) # prepare input stft data input_audio = tf.random.uniform((1, 256), maxval=1.0) signal_stft_tf = tf.signal.stft( input_audio, inverse_stft_layer.frame_size, inverse_stft_layer.frame_step, inverse_stft_layer.fft_size, window_fn=inverse_stft_layer.synthesis_window_fn, pad_end=False) with tf1.Session() as sess: self.signal_stft = sess.run(signal_stft_tf) self.feature_size = self.signal_stft.shape[-1] # create istft model and run non stream inference input_tf = tf.keras.layers.Input(shape=self.signal_stft.shape[1:3], batch_size=1, dtype=tf.complex64) net = inverse_stft_layer(input_tf) model_non_stream = tf.keras.models.Model(input_tf, net) self.non_stream_out = model_non_stream.predict(self.signal_stft)
def test_streaming_on_2d_data_strides(self, stride): """Tests Conv2DTranspose on 2d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) input_features = 3 # prepare input data: [batch, time, features, channels] x = np.random.rand(1, params.desired_samples, input_features, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 3), strides=(stride, stride), features=input_features, channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, input_features, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out)
def test_streaming_inference_external_state(self): with tf1.Session() as sess: output_non_stream_np, model_tf = self._run_non_stream_model() # input data for streaming stateless model input_tensors = [ tf.keras.layers.Input(shape=( 1, self.input_data.shape[2], ), batch_size=self.batch_size, dtype=tf.float32) ] # convert non streaming model to streaming one with external state mode = modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE model_stream = utils.convert_to_inference_model( model_tf, input_tensors, mode) # validate that model is convertable to tflite converter = tf1.lite.TFLiteConverter.from_session( sess, model_stream.inputs, model_stream.outputs) self.assertTrue(converter.convert()) inputs = [] for s in range(len(model_stream.inputs)): inputs.append( np.zeros(model_stream.inputs[s].shape, dtype=np.float32)) # streaming emulation: loop over every element in time for i in range(self.input_data.shape[1]): input_batch_np = self.input_data[:, i, :] input_batch_np = np.expand_dims(input_batch_np, 1) inputs[0] = input_batch_np outputs = model_stream.predict(inputs) # input_states_np = output_states_np for s in range(1, len(model_stream.inputs)): inputs[s] = outputs[s] for b in range(self.input_data.shape[0]): # loop over batch self.assertAllClose(outputs[0][b][0], output_non_stream_np[b][i])
def setUp(self): super(DsTcResnetTest, self).setUp() config = tf1.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf1.Session(config=config) tf1.keras.backend.set_session(self.sess) tf.keras.backend.set_learning_phase(0) test_utils.set_seed(123) self.params = utils.ds_tc_resnet_model_params(True) self.model = ds_tc_resnet.model(self.params) self.model.summary() self.input_data = np.random.rand(self.params.batch_size, self.params.desired_samples) # run non streaming inference self.non_stream_out = self.model.predict(self.input_data)
def test_cnn_model_end_to_end(self): config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) test_utils.set_seed(123) # data parameters num_time_bins = 12 feature_size = 12 # model params. total_stride = 2 params = test_utils.Params([total_stride], 0) params.model_name = 'cnn' params.cnn_filters = '2' params.cnn_kernel_size = '(3,3)' params.cnn_act = "'relu'" params.cnn_dilation_rate = '(1,1)' params.cnn_strides = '(2,2)' params.dropout1 = 0.5 params.units2 = '' params.act2 = '' params.label_count = 2 params.return_softmax = True params.quantize = 1 # apply quantization aware training params.data_shape = (num_time_bins, feature_size) params.preprocess = 'custom' model = cnn.model(params) model.summary() # prepare training and testing data train_images, train_labels = test_utils.generate_data( img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32) test_images = train_images test_labels = train_labels # create and train quantization aware model in non streaming mode model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) model.fit( train_images, train_labels, epochs=1, validation_data=(test_images, test_labels)) model.summary() # one test image train_image = train_images[:1,] # run tf non streaming inference non_stream_output_tf = model.predict(train_image) # specify input data shape for streaming mode params.data_shape = (total_stride, feature_size) # TODO(rybakov) add params structure for model with no feature extractor # prepare tf streaming model and use it to generate representative_dataset with quantize.quantize_scope(): stream_quantized_model = utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) calibration_data = prepare_calibration_data(stream_quantized_model, total_stride, train_image) def representative_dataset(dtype): def _representative_dataset_gen(): for i in range(len(calibration_data)): yield [ calibration_data[i][0].astype(dtype), # input audio packet calibration_data[i][1].astype(dtype), # conv state calibration_data[i][2].astype(dtype) # flatten state ] return _representative_dataset_gen # convert streaming quantization aware model to tflite # and apply post training quantization with quantize.quantize_scope(): tflite_streaming_model = utils.model_to_tflite( sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE, optimizations=[tf.lite.Optimize.DEFAULT], inference_type=tf.int8, experimental_new_quantizer=True, representative_dataset=representative_dataset(np.float32)) # run tflite in streaming mode and compare output logits with tf interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_states = [] for detail in interpreter.get_input_details(): input_states.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out_tflite = inference.run_stream_inference_classification_tflite( params, interpreter, train_image, input_states) self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)
def setUp(self): super(DsTcResnetTest, self).setUp() config = tf1.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf1.Session(config=config) tf1.keras.backend.set_session(self.sess) test_utils.set_seed(123) tf.keras.backend.set_learning_phase(0) # model parameters model_name = 'ds_tc_resnet' self.params = model_params.HOTWORD_MODEL_PARAMS[model_name] self.params.clip_duration_ms = 160 self.params.window_size_ms = 4.0 self.params.window_stride_ms = 2.0 self.params.wanted_words = 'a,b,c' self.params.ds_padding = "'causal','causal','causal'" self.params.ds_filters = '8,8,4' self.params.ds_repeat = '1,1,1' self.params.ds_residual = '0,1,1' # residual can not be applied with stride self.params.ds_kernel_size = '3,3,3' self.params.ds_stride = '2,1,1' # streaming conv with stride self.params.ds_dilation = '1,1,1' self.params.ds_pool = '1,2,1' # streaming conv with pool self.params.ds_filter_separable = '1,1,1' # convert ms to samples and compute labels count self.params = model_flags.update_flags(self.params) # compute total stride pools = utils.parse(self.params.ds_pool) strides = utils.parse(self.params.ds_stride) time_stride = [1] for pool in pools: if pool > 1: time_stride.append(pool) for stride in strides: if stride > 1: time_stride.append(stride) total_stride = np.prod(time_stride) # overide input data shape for streaming model with stride/pool self.params.data_stride = total_stride self.params.data_frame_padding = 'causal' # set desired number of frames in model frames_number = 16 frames_per_call = total_stride frames_number = (frames_number // frames_per_call) * frames_per_call # number of input audio samples required to produce one output frame framing_stride = max( self.params.window_stride_samples, max(0, self.params.window_size_samples - self.params.window_stride_samples)) signal_size = framing_stride * frames_number # desired number of samples in the input data to train non streaming model self.params.desired_samples = signal_size self.params.batch_size = 1 self.model = ds_tc_resnet.model(self.params) self.model.summary() self.input_data = np.random.rand(self.params.batch_size, self.params.desired_samples) # run non streaming inference self.non_stream_out = self.model.predict(self.input_data)
def test_streaming_on_1d_data_strides(self, stride): """Tests Conv2DTranspose on 1d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data: [batch, time, 1, channels] x = np.random.rand(1, params.desired_samples, 1, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 1), strides=(stride, 1), channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, 1, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) # Convert TF non-streaming model to TFLite external-state streaming model. tflite_streaming_model = utils.model_to_tflite( sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) self.assertTrue(tflite_streaming_model) # Run TFLite external-state streaming inference. interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() input_states = [] # before processing test sequence we create model state for s in range(len(input_details)): input_states.append(np.zeros(input_details[s]['shape'], dtype=np.float32)) stream_out_tflite_external_st = inference.run_stream_inference_tflite( params, interpreter, inp_audio, input_states, concat=True) # compare streaming TFLite with external-state vs TF non-streaming self.assertAllClose(stream_out_tflite_external_st, non_stream_out)