def test_streaming_strides(self, stride): """Test Conv1DTranspose layer in streaming mode with different strides. Args: stride: controls the upscaling factor """ # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # add batch dim # prepare non stream model model = conv1d_transpose_model(params, filters=1, kernel_size=3, stride=stride) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out)
def test_streaming_on_2d_data_strides(self, stride): """Tests Conv2DTranspose on 2d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) input_features = 3 # prepare input data: [batch, time, features, channels] x = np.random.rand(1, params.desired_samples, input_features, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model(params, filters=1, kernel_size=(3, 3), strides=(stride, stride), features=input_features, channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, input_features, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out)
def test_residual(self, step, padding, delay_also_in_non_streaming): """Test residual connection in streaming mode with conv layer.""" # model and data parameters cnn_filters = [1, 1] cnn_kernel_size = [5, 3] cnn_act = ['elu', 'elu'] cnn_use_bias = [False, False] cnn_padding = [padding, padding] params = test_utils.Params([step], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # prepare non stream model model, sum_delay = residual_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_padding, delay_also_in_non_streaming) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] shift = 1 if delay_also_in_non_streaming: # Delay was also applied in non-streaming, as well as streaming mode. non_stream_out = non_stream_out[shift + sum_delay:min_len] else: non_stream_out = non_stream_out[shift:min_len - sum_delay] stream_out = stream_out[sum_delay + shift:] self.assertAllEqual(non_stream_out.shape, (31 - sum_delay, )) self.assertAllClose(stream_out, non_stream_out)
def test_average_pooling_stream(self): # prepare input data params = test_utils.Params([1]) params.desired_samples = 5 batch_size = 1 time1 = params.desired_samples # it is time dim (will not be averaged out) time2 = 3 # this dim will be averaged out and become 1 feature = 16 # it is a feature dim # override data shape for streaming mode testing params.preprocess = 'custom' params.data_shape = (1, time2, feature) inp_audio = np.random.rand(batch_size, time1, time2, feature) inputs = tf.keras.layers.Input( shape=(time1, time2, feature), batch_size=batch_size) net = stream.Stream( cell=average_pooling2d.AveragePooling2D( kernel_size=(time1, time2), padding='valid'), use_one_step=False, pad_time_dim='causal')(inputs) model = tf.keras.Model(inputs, net) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference and compare streaming vs non streaming non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) net = tf.keras.layers.GlobalAveragePooling2D()(inputs) model_global = tf.keras.Model(inputs, net) model_global.summary() global_out = model_global.predict(inp_audio) # last result in streaming output has to be the same with global average self.assertAllClose(stream_out[0, -1, 0, :], global_out[0, :])
def test_stream_strided_convolution(self, get_model, conv_cell): # Test streaming convolutional layers with striding, dilation. cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_dilation_rate = [1, 1, 1, 2] cnn_strides = [2, 1, 3, 1] cnn_use_bias = [False, False, False, False] # prepare input data params = test_utils.Params(cnn_strides) x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 # prepare non stream model model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (42, )) self.assertAllClose(stream_out, non_stream_out)
def test_residual(self, step): # model and data parameters cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_use_bias = [False, False, False, False] cnn_padding = ['causal', 'causal', 'causal', 'causal'] params = test_utils.Params([step], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 # prepare non stream model model = residual_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_padding) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (32, )) self.assertAllClose(stream_out, non_stream_out)
def test_stream_framing(self, batch_frames, window_stride_samples): """Test DataFrame in streaming mode with different batch_frames and stride. Args: batch_frames: number of frames produced by one call in streaming mode window_stride_samples: stride of sliding window """ # data parameters params = Params( batch_frames=batch_frames, window_stride_samples=window_stride_samples) # prepare input data input_audio = np.arange(params.desired_samples) input_audio = np.expand_dims(input_audio, 0) # add batch dim # prepare non stream model padding = 'causal' inputs = tf.keras.Input( shape=(params.desired_samples,), batch_size=1, dtype=tf.float32) net = inputs net = data_frame.DataFrame( frame_size=params.window_size_samples, frame_step=params.window_stride_samples, use_one_step=False, padding=padding)( net) model = tf.keras.Model(inputs, net) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(input_audio) stream_out = test.run_stream_inference(params, model_stream, input_audio) self.assertAllClose(stream_out, non_stream_out)
def test_stream_strided_convolution(self, get_model, conv_cell): # Test streaming convolutional layers with striding, dilation. cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_dilation_rate = [1, 1, 1, 2] cnn_strides = [2, 1, 3, 1] cnn_use_bias = [False, False, False, False] # prepare input data params = test_utils.Params(cnn_strides) x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 if conv_cell == tf.keras.layers.SeparableConv1D: kwargs = dict( depthwise_initializer=tf.keras.initializers.GlorotUniform( seed=123), pointwise_initializer=tf.keras.initializers.GlorotUniform( seed=456)) else: kwargs = dict( kernel_initializer=tf.keras.initializers.GlorotUniform( seed=123)) # Prepare Keras native model. model_native = conv_model_keras_native(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias, **kwargs) model_native.summary() # prepare non stream model model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias, **kwargs) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) native_out = model_native.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] native_out = native_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] native_out = native_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (params.desired_samples / np.prod(cnn_strides), )) with self.subTest(name='stream_vs_non_stream'): self.assertAllClose(stream_out, non_stream_out) with self.subTest(name='non_stream_vs_native'): self.assertAllClose(non_stream_out, native_out)
def test_streaming_on_1d_data_strides(self, stride): """Tests Conv2DTranspose on 1d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data: [batch, time, 1, channels] x = np.random.rand(1, params.desired_samples, 1, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model(params, filters=1, kernel_size=(3, 1), strides=(stride, 1), channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, 1, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) # Convert TF non-streaming model to TFLite external-state streaming model. tflite_streaming_model = utils.model_to_tflite( sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) self.assertTrue(tflite_streaming_model) # Run TFLite external-state streaming inference. interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() input_states = [] # before processing test sequence we create model state for s in range(len(input_details)): input_states.append( np.zeros(input_details[s]['shape'], dtype=np.float32)) stream_out_tflite_external_st = test.run_stream_inference_tflite( params, interpreter, inp_audio, input_states, concat=True) # compare streaming TFLite with external-state vs TF non-streaming self.assertAllClose(stream_out_tflite_external_st, non_stream_out)