def test_dynamic_shape(self):
        # model and data parameters
        params = test_utils.Params([1], clip_duration_ms=0.25)

        # prepare input data
        x = np.arange(10)
        inp_audio = x
        inp_audio = np.expand_dims(inp_audio, 0)  # add batch dim

        # prepare non stream model
        params.desired_samples = None
        model = conv1d_transpose_model(params,
                                       filters=1,
                                       kernel_size=3,
                                       stride=1)
        model.summary()

        # run inference on input with dynamic shape
        model.predict(inp_audio)

        with self.assertRaisesRegex(
                ValueError, 'in streaming mode time dimension of input packet '
                'should not be dynamic: TFLite limitation'):
            # streaming model expected to fail on input data with dynamic shape
            params.data_shape = (None, )
            utils.to_streaming_inference(model, params,
                                         Modes.STREAM_INTERNAL_STATE_INFERENCE)
예제 #2
0
  def test_dynamic_shape(self):
    # model and data parameters
    params = test_utils.Params([1], clip_duration_ms=0.25)

    # prepare input data
    x = np.random.rand(1, params.desired_samples, 1, self.input_channels)
    inp_audio = x

    # prepare non stream model
    params.desired_samples = None
    model = conv2d_transpose_model(
        params,
        filters=1,
        kernel_size=(3, 1),
        strides=(1, 1),
        channels=self.input_channels)
    model.summary()

    # run inference on input with dynamic shape
    model.predict(inp_audio)

    with self.assertRaisesRegex(
        ValueError, 'in streaming mode time dimension of input packet '
        'should not be dynamic: TFLite limitation'):
      # streaming model expected to fail on input data with dynamic shape
      params.data_shape = (None, 1, self.input_channels)
      utils.to_streaming_inference(
          model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
예제 #3
0
  def testPreprocessStreamInferenceModeTFandTFLite(self,
                                                   preprocess,
                                                   feature_type,
                                                   model_name='gru'):
    # Validate that model with different preprocessing
    # can be converted to stream inference mode with TF and TFLite.
    params = model_params.HOTWORD_MODEL_PARAMS[model_name]
    # set parameters to test
    params.preprocess = preprocess
    params.feature_type = feature_type
    params = model_flags.update_flags(params)

    # create model
    model = models.MODELS[params.model_name](params)

    # convert TF non streaming model to TFLite streaming inference
    # with external states
    self.assertTrue(utils.model_to_tflite(
        self.sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE))

    # convert TF non streaming model to TF streaming with external states
    self.assertTrue(utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE))

    # convert TF non streaming model to TF streaming with internal states
    self.assertTrue(utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE))
예제 #4
0
 def test_to_streaming_inference(self):
     """Validate that model can be converted to any streaming mode with TF."""
     model_non_streaming = utils.to_streaming_inference(
         self.model, self.flags, Modes.NON_STREAM_INFERENCE)
     self.assertTrue(model_non_streaming)
     model_streaming_ext_state = utils.to_streaming_inference(
         self.model, self.flags, Modes.STREAM_EXTERNAL_STATE_INFERENCE)
     self.assertTrue(model_streaming_ext_state)
     model_streaming_int_state = utils.to_streaming_inference(
         self.model, self.flags, Modes.STREAM_INTERNAL_STATE_INFERENCE)
     self.assertTrue(model_streaming_int_state)
    def test_streaming_strides(self, stride):
        """Test Conv1DTranspose layer in streaming mode with different strides.

    Args:
        stride: controls the upscaling factor
    """

        # model and data parameters
        step = 1  # amount of data fed into streaming model on every iteration
        params = test_utils.Params([step], clip_duration_ms=0.25)

        # prepare input data
        x = np.arange(params.desired_samples)
        inp_audio = x
        inp_audio = np.expand_dims(inp_audio, 0)  # add batch dim

        # prepare non stream model
        model = conv1d_transpose_model(params,
                                       filters=1,
                                       kernel_size=3,
                                       stride=stride)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run inference
        non_stream_out = model.predict(inp_audio)
        stream_out = test.run_stream_inference(params, model_stream, inp_audio)

        self.assertAllClose(stream_out, non_stream_out)
    def testStreaming(self, input_frames):
        params = test_utils.Params([1])

        # shape of input data in the inference streaming mode (excluding batch size)
        params.data_shape = (1, self.feature_size)
        params.step = input_frames

        # prepare non streaming model
        inverse_stft_layer = inverse_stft.InverseSTFT(
            self.frame_size, self.frame_step, use_one_step=(input_frames == 1))
        input_tf = tf.keras.layers.Input(shape=self.signal_stft.shape[1:3],
                                         batch_size=1,
                                         dtype=tf.complex64)
        net = inverse_stft_layer(input_tf)
        model_non_stream = tf.keras.models.Model(input_tf, net)
        self.non_stream_out = model_non_stream.predict(self.signal_stft)

        # convert it to streaming model
        model_stream = utils.to_streaming_inference(
            model_non_stream, params,
            modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run streaming inference
        stream_out = inference.run_stream_inference(params, model_stream,
                                                    self.signal_stft)

        # several samples in the end will be missing
        stream_output_length = stream_out.shape[1]
        self.assertAllClose(stream_out,
                            self.non_stream_out[:, 0:stream_output_length])
예제 #7
0
    def test_delay_internal_state(self, delay_also_in_non_streaming):
        """Test delay layer with internal state."""

        # model and data parameters
        params = test_utils.Params([1], clip_duration_ms=1)

        # prepare non stream model
        time_delay = 3
        model = delay_model(params, time_delay, delay_also_in_non_streaming)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model.summary()

        # fill the buffer
        for i in range(time_delay):
            output = model_stream.predict([i + 1])
            self.assertAllEqual(output[0, 0, 0], 0)

        # now get the data with delay
        for i in range(time_delay):
            output = model_stream.predict([0])
            self.assertAllEqual(output[0, 0, 0], i + 1)
예제 #8
0
    def testStreaming(self, input_samples):
        # prepare non streaming model
        stft_layer = stft.STFT(self.frame_size,
                               self.frame_step,
                               mode=modes.Modes.TRAINING,
                               inference_batch_size=1,
                               padding='causal')
        input_tf = tf.keras.layers.Input(shape=(self.input_signal.shape[1], ),
                                         batch_size=1)
        net = stft_layer(input_tf)
        model_non_stream = tf.keras.models.Model(input_tf, net)

        params = test_utils.Params([1])
        # shape of input data in the inference streaming mode (excluding batch size)
        params.data_shape = (input_samples * stft_layer.frame_step, )
        params.step = input_samples

        # convert it to streaming model
        model_stream = utils.to_streaming_inference(
            model_non_stream, params,
            modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run streaming inference and compare it with default stft
        stream_out = inference.run_stream_inference(params, model_stream,
                                                    self.input_signal)
        stream_output_length = stream_out.shape[1]
        self.assertAllClose(stream_out, self.stft_out[:,
                                                      0:stream_output_length])
예제 #9
0
    def test_transposed_conv(self):
        """Test transposed and standard conv model with 'same' padding."""
        test_utils.set_seed(123)

        # model and data parameters
        cnn_filters = [1, 1]
        cnn_kernel_size = [5, 3]
        cnn_act = ['linear', 'linear']
        cnn_use_bias = [False, False]
        cnn_paddings = ['same', 'same']
        trans_paddings = ['same', 'causal']
        params = test_utils.Params([1], clip_duration_ms=2)

        # prepare input data
        x = np.arange(params.desired_samples)
        inp_audio = x
        inp_audio = np.expand_dims(inp_audio, 0)

        # prepare non stream model
        model = transposed_conv_model(params, cnn_filters, cnn_kernel_size,
                                      cnn_act, cnn_use_bias, cnn_paddings,
                                      trans_paddings)
        # set random weights
        all_weights = []
        for w in model.get_weights():
            if isinstance(w, np.ndarray):
                shape = w.shape
                new_w = np.random.rand(*shape)
                all_weights.append(new_w)
            else:
                all_weights.append(True)
        model.set_weights(all_weights)
        model.summary()
        non_stream_out = model.predict(inp_audio)

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()
        stream_out = inference.run_stream_inference(params, model_stream,
                                                    inp_audio)

        # shift defines the index after which data in streaming mode become valid:
        # in streaming mode we use ring buffers initialized with zeros and it needs
        # several cycles until they are filled with real data.
        shift = 2
        # the total conv delay is (5//2) * 2 + 3//2 = 5
        # (there is no delay from the k=3 s=2 transposed convs, 'same' or 'causal'),
        # and the explicit Delay layers add an additional same amount.
        total_delay = 10
        # normalize output data and compare them
        non_stream_out = non_stream_out[0, shift:-(total_delay), ]
        stream_out = stream_out[0, total_delay + shift:, ]

        self.assertAllClose(stream_out, non_stream_out)
예제 #10
0
  def test_ds_tc_resnet_stream(self):

    # prepare tf streaming model
    model_stream = utils.to_streaming_inference(
        self.model, self.params, Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run streaming inference
    stream_out = test.run_stream_inference_classification(
        self.params, model_stream, self.input_data)
    self.assertAllClose(stream_out, self.non_stream_out, atol=1e-5)
예제 #11
0
  def test_streaming_on_2d_data_strides(self, stride):
    """Tests Conv2DTranspose on 2d in streaming mode with different strides.

    Args:
        stride: controls the upscaling factor
    """

    tf1.reset_default_graph()
    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)

    # model and data parameters
    step = 1  # amount of data fed into streaming model on every iteration
    params = test_utils.Params([step], clip_duration_ms=0.25)

    input_features = 3
    # prepare input data: [batch, time, features, channels]
    x = np.random.rand(1, params.desired_samples, input_features,
                       self.input_channels)
    inp_audio = x

    # prepare non-streaming model
    model = conv2d_transpose_model(
        params,
        filters=1,
        kernel_size=(3, 3),
        strides=(stride, stride),
        features=input_features,
        channels=self.input_channels)
    model.summary()

    # set weights with bias
    for layer in model.layers:
      if isinstance(layer, tf.keras.layers.Conv2DTranspose):
        layer.set_weights([
            np.ones(layer.weights[0].shape),
            np.zeros(layer.weights[1].shape) + 0.5
        ])

    params.data_shape = (1, input_features, self.input_channels)

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference
    non_stream_out = model.predict(inp_audio)
    stream_out = inference.run_stream_inference(params, model_stream, inp_audio)

    self.assertAllClose(stream_out, non_stream_out)
예제 #12
0
    def test_residual(self, step, padding, delay_also_in_non_streaming):
        """Test residual connection in streaming mode with conv layer."""

        # model and data parameters
        cnn_filters = [1, 1]
        cnn_kernel_size = [5, 3]
        cnn_act = ['elu', 'elu']
        cnn_use_bias = [False, False]
        cnn_padding = [padding, padding]
        params = test_utils.Params([step], clip_duration_ms=2)

        # prepare input data
        x = np.arange(params.desired_samples)
        inp_audio = x
        inp_audio = np.expand_dims(inp_audio, 0)

        # prepare non stream model
        model, sum_delay = residual_model(params, cnn_filters, cnn_kernel_size,
                                          cnn_act, cnn_use_bias, cnn_padding,
                                          delay_also_in_non_streaming)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run inference
        non_stream_out = model.predict(inp_audio)
        stream_out = inference.run_stream_inference(params, model_stream,
                                                    inp_audio)

        # normalize output data and compare them
        channel = 0
        non_stream_out = non_stream_out[0, :, channel]
        stream_out = stream_out[0, :, channel]

        min_len = min(stream_out.shape[0], non_stream_out.shape[0])
        stream_out = stream_out[0:min_len]
        non_stream_out = non_stream_out[0:min_len]

        shift = 1
        if delay_also_in_non_streaming:
            # Delay was also applied in non-streaming, as well as streaming mode.
            non_stream_out = non_stream_out[shift + sum_delay:min_len]
        else:
            non_stream_out = non_stream_out[shift:min_len - sum_delay]
        stream_out = stream_out[sum_delay + shift:]

        self.assertAllEqual(non_stream_out.shape, (31 - sum_delay, ))
        self.assertAllClose(stream_out, non_stream_out)
예제 #13
0
    def testToStreamInferenceModeTFandTFLite(self, model_name='gru'):
        """Validate that model can be converted to any streaming inference mode."""
        params = _HOTWORD_MODEL_PARAMS[model_name]
        params = model_flags.update_flags(params)

        # create model
        model = models.MODELS[params.model_name](params)

        # convert TF non streaming model to TFLite streaming inference
        # with external states
        self.assertTrue(
            utils.model_to_tflite(self.sess, model, params,
                                  Modes.STREAM_EXTERNAL_STATE_INFERENCE))

        # convert TF non streaming model to TF streaming with external states
        self.assertTrue(
            utils.to_streaming_inference(
                model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE))

        # convert TF non streaming model to TF streaming with internal states
        self.assertTrue(
            utils.to_streaming_inference(
                model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE))
예제 #14
0
  def test_average_pooling_stream(self):

    # prepare input data
    params = test_utils.Params([1])
    params.desired_samples = 5

    batch_size = 1
    time1 = params.desired_samples  # it is time dim (will not be averaged out)
    time2 = 3  # this dim will be averaged out and become 1
    feature = 16  # it is a feature dim

    # override data shape for streaming mode testing
    params.preprocess = 'custom'
    params.data_shape = (1, time2, feature)

    inp_audio = np.random.rand(batch_size, time1, time2, feature)
    inputs = tf.keras.layers.Input(
        shape=(time1, time2, feature), batch_size=batch_size)

    net = stream.Stream(
        cell=average_pooling2d.AveragePooling2D(
            kernel_size=(time1, time2),
            padding='valid'),
        use_one_step=False,
        pad_time_dim='causal')(inputs)

    model = tf.keras.Model(inputs, net)
    model.summary()

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference and compare streaming vs non streaming
    non_stream_out = model.predict(inp_audio)
    stream_out = test.run_stream_inference(params, model_stream, inp_audio)
    self.assertAllClose(stream_out, non_stream_out)

    net = tf.keras.layers.GlobalAveragePooling2D()(inputs)
    model_global = tf.keras.Model(inputs, net)
    model_global.summary()

    global_out = model_global.predict(inp_audio)
    # last result in streaming output has to be the same with global average
    self.assertAllClose(stream_out[0, -1, 0, :], global_out[0, :])
예제 #15
0
    def testToNonStreamInferenceTFandTFLite(self, model_name='svdf'):
        """Validate that model can be converted to non stream inference mode."""
        params = _HOTWORD_MODEL_PARAMS[model_name]
        params = model_flags.update_flags(params)

        # create model
        model = models.MODELS[params.model_name](params)

        # convert TF non streaming model to TF non streaming inference model
        # it will disable dropouts
        self.assertTrue(
            utils.to_streaming_inference(model, params,
                                         Modes.NON_STREAM_INFERENCE))

        # convert TF non streaming model to TFLite non streaming inference
        self.assertTrue(
            utils.model_to_tflite(self.sess, model, params,
                                  Modes.NON_STREAM_INFERENCE))
예제 #16
0
    def test_external_streaming_shapes(self, model_name):
        params = model_params.HOTWORD_MODEL_PARAMS[model_name]
        params = model_flags.update_flags(params)
        model = models.MODELS[params.model_name](params)
        external_model = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)

        # The first 'n' inputs correspond to the 'n' inputs that the model takes
        # in non-streaming mode. The rest of the input tensors represent the
        # internal states for each layer in the model.
        inputs = [
            np.zeros(shape, dtype=np.float32)
            for shape in external_model.input_shapes
        ]
        outputs = external_model.predict(inputs)
        for output, expected_shape in zip(outputs,
                                          external_model.output_shapes):
            self.assertEqual(output.shape, expected_shape)
예제 #17
0
    def test_stream_strided_convolution(self, get_model, conv_cell):
        # Test streaming convolutional layers with striding, dilation.
        cnn_filters = [1, 1, 1, 1]
        cnn_kernel_size = [3, 3, 3, 3]
        cnn_act = ['linear', 'linear', 'elu', 'elu']
        cnn_dilation_rate = [1, 1, 1, 2]
        cnn_strides = [2, 1, 3, 1]
        cnn_use_bias = [False, False, False, False]

        # prepare input data
        params = test_utils.Params(cnn_strides)
        x = np.arange(params.desired_samples)
        frequency = 2.0
        inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency *
                           x) + np.random.rand(1, params.desired_samples) * 0.5

        # prepare non stream model
        model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size,
                          cnn_act, cnn_dilation_rate, cnn_strides,
                          cnn_use_bias)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run inference
        non_stream_out = model.predict(inp_audio)
        stream_out = test.run_stream_inference(params, model_stream, inp_audio)

        # normalize output data and compare them
        channel = 0
        non_stream_out = non_stream_out[0, :, channel]
        stream_out = stream_out[0, :, channel]

        min_len = min(stream_out.shape[0], non_stream_out.shape[0])
        stream_out = stream_out[0:min_len]
        non_stream_out = non_stream_out[0:min_len]
        self.assertAllEqual(non_stream_out.shape, (42, ))
        self.assertAllClose(stream_out, non_stream_out)
예제 #18
0
    def test_residual(self, step):

        # model and data parameters
        cnn_filters = [1, 1, 1, 1]
        cnn_kernel_size = [3, 3, 3, 3]
        cnn_act = ['linear', 'linear', 'elu', 'elu']
        cnn_use_bias = [False, False, False, False]
        cnn_padding = ['causal', 'causal', 'causal', 'causal']
        params = test_utils.Params([step], clip_duration_ms=2)

        # prepare input data
        x = np.arange(params.desired_samples)
        frequency = 2.0
        inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency *
                           x) + np.random.rand(1, params.desired_samples) * 0.5

        # prepare non stream model
        model = residual_model(params, cnn_filters, cnn_kernel_size, cnn_act,
                               cnn_use_bias, cnn_padding)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run inference
        non_stream_out = model.predict(inp_audio)
        stream_out = test.run_stream_inference(params, model_stream, inp_audio)

        # normalize output data and compare them
        channel = 0
        non_stream_out = non_stream_out[0, :, channel]
        stream_out = stream_out[0, :, channel]

        min_len = min(stream_out.shape[0], non_stream_out.shape[0])
        stream_out = stream_out[0:min_len]
        non_stream_out = non_stream_out[0:min_len]
        self.assertAllEqual(non_stream_out.shape, (32, ))
        self.assertAllClose(stream_out, non_stream_out)
  def test_stream_framing(self, batch_frames, window_stride_samples):
    """Test DataFrame in streaming mode with different batch_frames and stride.

    Args:
        batch_frames: number of frames produced by one call in streaming mode
        window_stride_samples: stride of sliding window
    """

    # data parameters
    params = Params(
        batch_frames=batch_frames, window_stride_samples=window_stride_samples)

    # prepare input data
    input_audio = np.arange(params.desired_samples)
    input_audio = np.expand_dims(input_audio, 0)  # add batch dim

    # prepare non stream model
    padding = 'causal'
    inputs = tf.keras.Input(
        shape=(params.desired_samples,), batch_size=1, dtype=tf.float32)
    net = inputs
    net = data_frame.DataFrame(
        frame_size=params.window_size_samples,
        frame_step=params.window_stride_samples,
        use_one_step=False,
        padding=padding)(
            net)
    model = tf.keras.Model(inputs, net)
    model.summary()

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference
    non_stream_out = model.predict(input_audio)
    stream_out = test.run_stream_inference(params, model_stream, input_audio)
    self.assertAllClose(stream_out, non_stream_out)
예제 #20
0
    def test_conv(self):
        """Test conv model with 'same' padding."""

        # model and data parameters
        cnn_filters = [1, 1, 1]
        cnn_kernel_size = [5, 3, 5]
        cnn_act = ['elu', 'elu', 'elu']
        cnn_use_bias = [False, False, False]
        cnn_padding = ['same', 'causal', 'same']
        params = test_utils.Params([1], clip_duration_ms=2)

        # prepare input data
        x = np.arange(params.desired_samples)
        inp_audio = x
        inp_audio = np.expand_dims(inp_audio, 0)

        # prepare non stream model
        model, sum_delay, sum_shift = conv_model(params, cnn_filters,
                                                 cnn_kernel_size, cnn_act,
                                                 cnn_use_bias, cnn_padding)
        model.summary()
        non_stream_out = model.predict(inp_audio)

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()
        stream_out = inference.run_stream_inference(params, model_stream,
                                                    inp_audio)

        shift = sum_shift + 1
        # normalize output data and compare them
        non_stream_out = non_stream_out[0, shift:-(sum_delay), ]
        stream_out = stream_out[0, sum_delay + shift:, ]

        self.assertAllClose(stream_out, non_stream_out)
예제 #21
0
def tf_stream_state_external_model_accuracy(
        flags,
        folder,
        weights_name='best_weights',
        accuracy_name='stream_state_external_model_accuracy_sub_set.txt',
        reset_state=False,
        max_test_samples=1000):
    """Compute accuracy of streamable model with external state using TF.

  Args:
      flags: model and data settings
      folder: folder name where accuracy report will be stored
      weights_name: file name with model weights
      accuracy_name: file name for storing accuracy in path + accuracy_name
      reset_state: reset state between testing sequences.
        If True - then it is non streaming testing environment: state will be
          reseted on every test and will not be transferred to another one (as
          it is done in real streaming).
      max_test_samples: max number of test samples. In this mode model is slow
        with TF because of batch size 1, so accuracy is computed on subset of
        testing data
  Returns:
    accuracy
  """
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    tf.keras.backend.set_session(sess)

    audio_processor = input_data.AudioProcessor(flags)
    set_size = audio_processor.set_size('testing')
    set_size = np.minimum(max_test_samples, set_size)
    inference_batch_size = 1
    tf.keras.backend.set_learning_phase(0)
    flags.batch_size = inference_batch_size  # set batch size
    model = models.MODELS[flags.model_name](flags)
    weights_path = os.path.join(flags.train_dir, weights_name)
    model.load_weights(weights_path).expect_partial()
    model_stream = utils.to_streaming_inference(
        model, flags, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)

    logging.info('tf stream model state external with reset_state %d',
                 reset_state)

    inputs = []
    for s in range(len(model_stream.inputs)):
        inputs.append(np.zeros(model_stream.inputs[s].shape, dtype=np.float32))

    total_accuracy = 0.0
    count = 0.0
    inference_batch_size = 1
    for i in range(0, set_size, inference_batch_size):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            inference_batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0,
            sess)

        if reset_state:
            for s in range(len(model_stream.inputs)):
                inputs[s] = np.zeros(model_stream.inputs[s].shape,
                                     dtype=np.float32)

        if flags.preprocess == 'raw':
            start = 0
            end = flags.window_stride_samples
            # iterate over time samples with stride = window_stride_samples
            while end <= test_fingerprints.shape[1]:
                # get new frame from stream of data
                stream_update = test_fingerprints[:, start:end]

                # update indexes of streamed updates
                start = end
                end = start + flags.window_stride_samples

                # set input audio data (by default input data at index 0)
                inputs[0] = stream_update

                # run inference
                outputs = model_stream.predict(inputs)

                # get output states and set it back to input states
                # which will be fed in the next inference cycle
                for s in range(1, len(model_stream.inputs)):
                    inputs[s] = outputs[s]

                stream_output_arg = np.argmax(outputs[0])
        else:
            # iterate over frames
            for t in range(test_fingerprints.shape[1]):
                # get new frame from stream of data
                stream_update = test_fingerprints[:, t, :]

                # [batch, time=1, feature]
                stream_update = np.expand_dims(stream_update, axis=1)

                # set input audio data (by default input data at index 0)
                inputs[0] = stream_update

                # run inference
                outputs = model_stream.predict(inputs)

                # get output states and set it back to input states
                # which will be fed in the next inference cycle
                for s in range(1, len(model_stream.inputs)):
                    inputs[s] = outputs[s]

                stream_output_arg = np.argmax(outputs[0])
        total_accuracy = total_accuracy + (test_ground_truth[0]
                                           == stream_output_arg)
        count = count + 1
        if i % 200 == 0 and i:
            logging.info(
                'tf test accuracy, stream model state external = %.2f%% %d out of %d',
                *(total_accuracy * 100 / count, i, set_size))

    total_accuracy = total_accuracy / count
    logging.info(
        'TF Final test accuracy of stream model state external = %.2f%% (N=%d)',
        *(total_accuracy * 100, set_size))

    path = os.path.join(flags.train_dir, folder)
    if not os.path.exists(path):
        os.makedirs(path)

    fname_summary = 'model_summary_stream_state_external'
    utils.save_model_summary(model_stream,
                             path,
                             file_name=fname_summary + '.txt')

    tf.keras.utils.plot_model(model_stream,
                              to_file=os.path.join(path,
                                                   fname_summary + '.png'),
                              show_shapes=True,
                              expand_nested=True)

    with open(os.path.join(path, accuracy_name), 'wt') as fd:
        fd.write('%f on set_size %d' % (total_accuracy * 100, set_size))
    return total_accuracy * 100
예제 #22
0
def tf_stream_state_internal_model_accuracy(
        flags,
        folder,
        weights_name='best_weights',
        accuracy_name='tf_stream_state_internal_model_accuracy_sub_set.txt',
        max_test_samples=1000):
    """Compute accuracy of streamable model with internal state using TF.

  Testign model with batch size 1 can be slow, so accuracy is evaluated
  on subset of data with size max_test_samples
  Args:
      flags: model and data settings
      folder: folder name where accuracy report will be stored
      weights_name: file name with model weights
      accuracy_name: file name for storing accuracy in path + accuracy_name
      max_test_samples: max number of test samples. In this mode model is slow
        with TF because of batch size 1, so accuracy is computed on subset of
        testing data
  Returns:
    accuracy
  """
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    tf.keras.backend.set_session(sess)

    logging.info('tf stream model state internal without state resetting'
                 'between testing sequences')

    audio_processor = input_data.AudioProcessor(flags)
    set_size = audio_processor.set_size('testing')
    set_size = np.minimum(max_test_samples, set_size)
    inference_batch_size = 1
    tf.keras.backend.set_learning_phase(0)
    flags.batch_size = inference_batch_size  # set batch size
    model = models.MODELS[flags.model_name](flags)
    weights_path = os.path.join(flags.train_dir, weights_name)
    model.load_weights(weights_path).expect_partial()

    model_stream = utils.to_streaming_inference(
        model, flags, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)

    total_accuracy = 0.0
    count = 0.0
    for i in range(0, set_size, inference_batch_size):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            inference_batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0,
            sess)

        if flags.preprocess == 'raw':
            stream_output_prediction = run_stream_inference_classification(
                flags, model_stream, test_fingerprints)
            stream_output_arg = np.argmax(stream_output_prediction)
        else:
            # iterate over frames
            for t in range(test_fingerprints.shape[1]):
                # get new frame from stream of data
                stream_update = test_fingerprints[:, t, :]

                # [batch, time=1, feature]
                stream_update = np.expand_dims(stream_update, axis=1)

                # classification result of a current frame
                stream_output_prediction = model_stream.predict(stream_update)
                stream_output_arg = np.argmax(stream_output_prediction)

        total_accuracy = total_accuracy + (test_ground_truth[0]
                                           == stream_output_arg)
        count = count + 1
        if i % 200 == 0 and i:
            logging.info(
                'tf test accuracy, stream model state internal = %.2f%% %d out of %d',
                *(total_accuracy * 100 / count, i, set_size))

    total_accuracy = total_accuracy / count
    logging.info(
        'TF Final test accuracy of stream model state internal = %.2f%% (N=%d)',
        *(total_accuracy * 100, set_size))

    path = os.path.join(flags.train_dir, folder)
    if not os.path.exists(path):
        os.makedirs(path)

    fname_summary = 'model_summary_stream_state_internal'
    utils.save_model_summary(model_stream,
                             path,
                             file_name=fname_summary + '.txt')

    tf.keras.utils.plot_model(model_stream,
                              to_file=os.path.join(path,
                                                   fname_summary + '.png'),
                              show_shapes=True,
                              expand_nested=True)

    with open(os.path.join(path, accuracy_name), 'wt') as fd:
        fd.write('%f on set_size %d' % (total_accuracy * 100, set_size))
    return total_accuracy * 100
예제 #23
0
  def test_streaming_on_1d_data_strides(self, stride):
    """Tests Conv2DTranspose on 1d in streaming mode with different strides.

    Args:
        stride: controls the upscaling factor
    """

    tf1.reset_default_graph()
    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)

    # model and data parameters
    step = 1  # amount of data fed into streaming model on every iteration
    params = test_utils.Params([step], clip_duration_ms=0.25)

    # prepare input data: [batch, time, 1, channels]
    x = np.random.rand(1, params.desired_samples, 1, self.input_channels)
    inp_audio = x

    # prepare non-streaming model
    model = conv2d_transpose_model(
        params,
        filters=1,
        kernel_size=(3, 1),
        strides=(stride, 1),
        channels=self.input_channels)
    model.summary()

    # set weights with bias
    for layer in model.layers:
      if isinstance(layer, tf.keras.layers.Conv2DTranspose):
        layer.set_weights([
            np.ones(layer.weights[0].shape),
            np.zeros(layer.weights[1].shape) + 0.5
        ])

    params.data_shape = (1, 1, self.input_channels)

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference
    non_stream_out = model.predict(inp_audio)
    stream_out = inference.run_stream_inference(params, model_stream, inp_audio)

    self.assertAllClose(stream_out, non_stream_out)

    # Convert TF non-streaming model to TFLite external-state streaming model.
    tflite_streaming_model = utils.model_to_tflite(
        sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)
    self.assertTrue(tflite_streaming_model)

    # Run TFLite external-state streaming inference.
    interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()

    input_states = []
    # before processing test sequence we create model state
    for s in range(len(input_details)):
      input_states.append(np.zeros(input_details[s]['shape'], dtype=np.float32))

    stream_out_tflite_external_st = inference.run_stream_inference_tflite(
        params, interpreter, inp_audio, input_states, concat=True)

    # compare streaming TFLite with external-state vs TF non-streaming
    self.assertAllClose(stream_out_tflite_external_st, non_stream_out)
예제 #24
0
    def test_stream_strided_convolution(self, get_model, conv_cell):
        # Test streaming convolutional layers with striding, dilation.
        cnn_filters = [1, 1, 1, 1]
        cnn_kernel_size = [3, 3, 3, 3]
        cnn_act = ['linear', 'linear', 'elu', 'elu']
        cnn_dilation_rate = [1, 1, 1, 2]
        cnn_strides = [2, 1, 3, 1]
        cnn_use_bias = [False, False, False, False]

        # prepare input data
        params = test_utils.Params(cnn_strides)
        x = np.arange(params.desired_samples)
        frequency = 2.0
        inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency *
                           x) + np.random.rand(1, params.desired_samples) * 0.5

        if conv_cell == tf.keras.layers.SeparableConv1D:
            kwargs = dict(
                depthwise_initializer=tf.keras.initializers.GlorotUniform(
                    seed=123),
                pointwise_initializer=tf.keras.initializers.GlorotUniform(
                    seed=456))
        else:
            kwargs = dict(
                kernel_initializer=tf.keras.initializers.GlorotUniform(
                    seed=123))

        # Prepare Keras native model.
        model_native = conv_model_keras_native(params, conv_cell, cnn_filters,
                                               cnn_kernel_size, cnn_act,
                                               cnn_dilation_rate, cnn_strides,
                                               cnn_use_bias, **kwargs)
        model_native.summary()

        # prepare non stream model
        model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size,
                          cnn_act, cnn_dilation_rate, cnn_strides,
                          cnn_use_bias, **kwargs)
        model.summary()

        # prepare streaming model
        model_stream = utils.to_streaming_inference(
            model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
        model_stream.summary()

        # run inference
        non_stream_out = model.predict(inp_audio)
        native_out = model_native.predict(inp_audio)
        stream_out = test.run_stream_inference(params, model_stream, inp_audio)

        # normalize output data and compare them
        channel = 0
        non_stream_out = non_stream_out[0, :, channel]
        native_out = native_out[0, :, channel]
        stream_out = stream_out[0, :, channel]

        min_len = min(stream_out.shape[0], non_stream_out.shape[0])
        stream_out = stream_out[0:min_len]
        native_out = native_out[0:min_len]
        non_stream_out = non_stream_out[0:min_len]
        self.assertAllEqual(non_stream_out.shape,
                            (params.desired_samples / np.prod(cnn_strides), ))

        with self.subTest(name='stream_vs_non_stream'):
            self.assertAllClose(stream_out, non_stream_out)

        with self.subTest(name='non_stream_vs_native'):
            self.assertAllClose(non_stream_out, native_out)
예제 #25
0
  def test_cnn_model_end_to_end(self):

    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)
    test_utils.set_seed(123)

    # data parameters
    num_time_bins = 12
    feature_size = 12

    # model params.
    total_stride = 2
    params = test_utils.Params([total_stride], 0)
    params.model_name = 'cnn'
    params.cnn_filters = '2'
    params.cnn_kernel_size = '(3,3)'
    params.cnn_act = "'relu'"
    params.cnn_dilation_rate = '(1,1)'
    params.cnn_strides = '(2,2)'
    params.dropout1 = 0.5
    params.units2 = ''
    params.act2 = ''

    params.label_count = 2
    params.return_softmax = True
    params.quantize = 1  # apply quantization aware training

    params.data_shape = (num_time_bins, feature_size)
    params.preprocess = 'custom'

    model = cnn.model(params)
    model.summary()

    # prepare training and testing data
    train_images, train_labels = test_utils.generate_data(
        img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32)
    test_images = train_images
    test_labels = train_labels

    # create and train quantization aware model in non streaming mode
    model.compile(
        optimizer='adam',
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy'])
    model.fit(
        train_images,
        train_labels,
        epochs=1,
        validation_data=(test_images, test_labels))
    model.summary()

    # one test image
    train_image = train_images[:1,]

    # run tf non streaming inference
    non_stream_output_tf = model.predict(train_image)

    # specify input data shape for streaming mode
    params.data_shape = (total_stride, feature_size)
    # TODO(rybakov) add params structure for model with no feature extractor

    # prepare tf streaming model and use it to generate representative_dataset
    with quantize.quantize_scope():
      stream_quantized_model = utils.to_streaming_inference(
          model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)

    calibration_data = prepare_calibration_data(stream_quantized_model,
                                                total_stride, train_image)

    def representative_dataset(dtype):
      def _representative_dataset_gen():
        for i in range(len(calibration_data)):
          yield [
              calibration_data[i][0].astype(dtype),  # input audio packet
              calibration_data[i][1].astype(dtype),  # conv state
              calibration_data[i][2].astype(dtype)  # flatten state
          ]

      return _representative_dataset_gen

    # convert streaming quantization aware model to tflite
    # and apply post training quantization
    with quantize.quantize_scope():
      tflite_streaming_model = utils.model_to_tflite(
          sess, model, params,
          Modes.STREAM_EXTERNAL_STATE_INFERENCE,
          optimizations=[tf.lite.Optimize.DEFAULT],
          inference_type=tf.int8,
          experimental_new_quantizer=True,
          representative_dataset=representative_dataset(np.float32))

    # run tflite in streaming mode and compare output logits with tf
    interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model)
    interpreter.allocate_tensors()
    input_states = []
    for detail in interpreter.get_input_details():
      input_states.append(np.zeros(detail['shape'], dtype=np.float32))
    stream_out_tflite = inference.run_stream_inference_classification_tflite(
        params, interpreter, train_image, input_states)
    self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)