예제 #1
0
    def testPreprocessStreamInferenceModeTFandTFLite(self,
                                                     preprocess,
                                                     feature_type,
                                                     model_name='gru'):
        # Validate that model with different preprocessing
        # can be converted to stream inference mode with TF and TFLite.
        params = model_params.HOTWORD_MODEL_PARAMS[model_name]
        # set parameters to test
        params.preprocess = preprocess
        params.feature_type = feature_type
        params = model_flags.update_flags(params)

        # create model
        model = models.MODELS[params.model_name](params)

        # convert TF non streaming model to TFLite streaming inference
        # with external states
        self.assertTrue(
            utils.model_to_tflite(self.sess, model, params,
                                  modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE))

        # convert TF non streaming model to TF streaming with external states
        self.assertTrue(
            utils.to_streaming_inference(
                model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE))

        # convert TF non streaming model to TF streaming with internal states
        self.assertTrue(
            utils.to_streaming_inference(
                model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE))
    def _RunGetDataTest(self, preprocess, window_size_ms):
        tmp_dir = self.get_temp_dir()
        wav_dir = os.path.join(tmp_dir, "wavs")
        os.mkdir(wav_dir)
        self._SaveWavFolders(wav_dir, ["a", "b", "c"], 100)
        background_dir = os.path.join(wav_dir, "_background_noise_")
        os.mkdir(background_dir)
        wav_data = self._GetWavData()
        for i in range(10):
            file_path = os.path.join(background_dir,
                                     "background_audio_%d.wav" % i)
            self._SaveTestWavFile(file_path, wav_data)
        flags = self._GetDefaultFlags()
        flags.window_size_ms = window_size_ms
        flags.preprocess = preprocess
        flags.train_dir = tmp_dir
        flags.data_dir = wav_dir
        flags = model_flags.update_flags(flags)
        with self.cached_session() as sess:
            audio_processor = input_data.AudioProcessor(flags)
            result_data, result_labels = audio_processor.get_data(
                10, 0, flags, 0.3, 0.1, 100, "training", 0.0, 0.0, sess)

            self.assertLen(result_data, 10)
            self.assertLen(result_labels, 10)
예제 #3
0
    def test_model_to_saved(self, model_name='dnn'):
        """SavedModel supports both stateless and stateful graphs."""
        params = model_params.HOTWORD_MODEL_PARAMS[model_name]
        params = model_flags.update_flags(params)

        # create model
        model = models.MODELS[params.model_name](params)
        utils.model_to_saved(model, params, FLAGS.test_tmpdir)
예제 #4
0
def ds_tc_resnet_model_params(use_tf_fft=False):
  """Generate parameters for ds_tc_resnet model."""

  # model parameters
  model_name = 'ds_tc_resnet'
  params = model_params.HOTWORD_MODEL_PARAMS[model_name]
  params.causal_data_frame_padding = 1  # causal padding on DataFrame
  params.clip_duration_ms = 160
  params.use_tf_fft = use_tf_fft
  params.mel_non_zero_only = not use_tf_fft
  params.feature_type = 'mfcc_tf'
  params.window_size_ms = 5.0
  params.window_stride_ms = 2.0
  params.wanted_words = 'a,b,c'
  params.ds_padding = "'causal','causal','causal','causal'"
  params.ds_filters = '4,4,4,2'
  params.ds_repeat = '1,1,1,1'
  params.ds_residual = '0,1,1,1'  # no residuals on strided layers
  params.ds_kernel_size = '3,3,3,1'
  params.ds_dilation = '1,1,1,1'
  params.ds_stride = '2,1,1,1'  # streaming conv with stride
  params.ds_pool = '1,2,1,1'  # streaming conv with pool
  params.ds_filter_separable = '1,1,1,1'

  # convert ms to samples and compute labels count
  params = model_flags.update_flags(params)

  # compute total stride
  pools = model_utils.parse(params.ds_pool)
  strides = model_utils.parse(params.ds_stride)
  time_stride = [1]
  for pool in pools:
    if pool > 1:
      time_stride.append(pool)
  for stride in strides:
    if stride > 1:
      time_stride.append(stride)
  total_stride = np.prod(time_stride)

  # override input data shape for streaming model with stride/pool
  params.data_stride = total_stride
  params.data_shape = (total_stride * params.window_stride_samples,)

  # set desired number of frames in model
  frames_number = 16
  frames_per_call = total_stride
  frames_number = (frames_number // frames_per_call) * frames_per_call
  # number of input audio samples required to produce one output frame
  framing_stride = max(
      params.window_stride_samples,
      max(0, params.window_size_samples -
          params.window_stride_samples))
  signal_size = framing_stride * frames_number

  # desired number of samples in the input data to train non streaming model
  params.desired_samples = signal_size
  params.batch_size = 1
  return params
예제 #5
0
 def testTrain(self, split_data):
   input_flags = self._GetDefaultFlags(split_data)
   input_flags = model_flags.update_flags(input_flags)
   train.train(input_flags)
   self.assertTrue(
       tf.io.gfile.exists(os.path.join(input_flags.train_dir, 'graph.pbtxt')))
   self.assertTrue(
       tf.io.gfile.exists(os.path.join(input_flags.train_dir, 'labels.txt')))
   self.assertTrue(
       tf.io.gfile.exists(
           os.path.join(input_flags.train_dir, 'accuracy_last.txt')))
예제 #6
0
def get_model_with_default_params(model_name, mode=None):
  """Creates a model with the params specified in HOTWORD_MODEL_PARAMS."""
  if model_name not in model_params.HOTWORD_MODEL_PARAMS:
    raise KeyError(
        "Expected 'model_name' to be one of "
        f"{model_params.HOTWORD_MODEL_PARAMS.keys} but got '{model_name}'.")
  params = model_params.HOTWORD_MODEL_PARAMS[model_name]
  params = model_flags.update_flags(params)
  model = kws_models.MODELS[params.model_name](params)
  if mode is not None:
    model = to_streaming_inference(model, flags=params, mode=mode)
  return model
예제 #7
0
 def _GetDefaultFlags(self, split_data):
   params = model_params.dnn_params()
   params.data_dir = self._PrepareDummyTrainingData(
   ) if split_data == 1 else self._PrepareDummyTrainingDataSplit()
   params.wanted_words = 'a,b,c'
   params.split_data = split_data
   params.summaries_dir = self._PrepareDummyDir('summaries' + str(split_data))
   params.train_dir = self._PrepareDummyDir('train' + str(split_data))
   params.how_many_training_steps = '2'
   params.learning_rate = '0.01'
   params.eval_step_interval = 1
   params.save_step_interval = 1
   params.clip_duration_ms = 100
   params.batch_size = 1
   return model_flags.update_flags(params)
예제 #8
0
    def _testTFLite(self,
                    preprocess='raw',
                    feature_type='mfcc_op',
                    model_name='svdf'):
        params = model_params.HOTWORD_MODEL_PARAMS[model_name]
        params.clip_duration_ms = 100  # make it shorter for testing

        # set parameters to test
        params.preprocess = preprocess
        params.feature_type = feature_type
        params = model_flags.update_flags(params)

        # create model
        model = models.MODELS[params.model_name](params)

        # convert TF non streaming model to TFLite non streaming inference
        self.assertTrue(
            utils.model_to_tflite(self.sess, model, params,
                                  modes.Modes.NON_STREAM_INFERENCE))
예제 #9
0
def main(_):
    # Update flags
    flags = model_flags.update_flags(FLAGS)

    if flags.train:
        # Create model folders where logs and model will be stored
        os.makedirs(flags.train_dir)
        os.mkdir(flags.summaries_dir)

        # Model training
        train.train(flags)
    else:
        if not os.path.isdir(flags.train_dir):
            raise ValueError(
                'model is not trained set "--train 1" and retrain it')

    # write all flags settings into json
    with open(os.path.join(flags.train_dir, 'flags.json'), 'wt') as f:
        json.dump(flags.__dict__, f)

    # convert to SavedModel
    test.convert_model_saved(flags, 'non_stream',
                             modes.Modes.NON_STREAM_INFERENCE)
    try:
        test.convert_model_saved(flags, 'stream_state_internal',
                                 modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    except (ValueError, IndexError) as e:
        logging.info('FAILED to run TF streaming: %s', e)

    logging.info('run TF non streaming model accuracy evaluation')
    # with TF
    folder_name = 'tf'
    test.tf_non_stream_model_accuracy(flags, folder_name)

    # with TF.
    # We can apply non stream model on stream data, by running inference
    # every 200ms (for example), so that total latency will be similar with
    # streaming model which is executed every 20ms.
    # To measure the impact of sampling on model accuracy,
    # we introduce time_shift_ms during accuracy evaluation.
    # Convert milliseconds to samples:
    time_shift_samples = int(
        (flags.time_shift_ms * flags.sample_rate) / model_flags.MS_PER_SECOND)
    test.tf_non_stream_model_accuracy(
        flags,
        folder_name,
        time_shift_samples,
        accuracy_name='tf_non_stream_model_sampling_stream_accuracy.txt')

    name2opt = {
        '': None,
        'quantize_opt_for_size_': [tf.lite.Optimize.DEFAULT],
    }

    for opt_name, optimizations in name2opt.items():

        if (opt_name and flags.feature_type == 'mfcc_tf'
                and flags.preprocess == 'raw'):
            logging.info(
                'feature type mfcc_tf needs quantization aware training '
                'for quantization - it is not implemented')
            continue

        folder_name = opt_name + 'tflite_non_stream'
        file_name = 'non_stream.tflite'
        mode = modes.Modes.NON_STREAM_INFERENCE
        test.convert_model_tflite(flags,
                                  folder_name,
                                  mode,
                                  file_name,
                                  optimizations=optimizations)
        test.tflite_non_stream_model_accuracy(flags, folder_name, file_name)

        # these models are using bi-rnn, so they are non streamable by default
        # also models using striding or pooling are not supported for streaming now
        non_streamable_models = {'att_mh_rnn', 'att_rnn', 'tc_resnet'}

        model_is_streamable = True
        if flags.model_name in non_streamable_models:
            model_is_streamable = False
        # below models can use striding in time dimension,
        # but this is currently unsupported
        elif flags.model_name == 'cnn':
            for strides in model_utils.parse(flags.cnn_strides):
                if strides[0] > 1:
                    model_is_streamable = False
                    break
        elif flags.model_name == 'ds_cnn':
            if model_utils.parse(flags.cnn1_strides)[0] > 1:
                model_is_streamable = False
            for strides in model_utils.parse(flags.dw2_strides):
                if strides[0] > 1:
                    model_is_streamable = False
                    break

        # set input data shape for testing inference in streaming mode
        flags.data_shape = modes.get_input_data_shape(
            flags, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)

        # if model can be streamed, then run conversion/evaluation in streaming mode
        if model_is_streamable:
            # ---------------- TF streaming model accuracy evaluation ----------------
            # Streaming model with external state evaluation using TF with state reset
            if not opt_name:
                logging.info(
                    'run TF evalution only without optimization/quantization')
                try:
                    folder_name = 'tf'
                    test.tf_stream_state_external_model_accuracy(
                        flags,
                        folder_name,
                        accuracy_name=
                        'stream_state_external_model_accuracy_sub_set_reset1.txt',
                        reset_state=True
                    )  # with state reset between test sequences

                    # Streaming (with external state) evaluation using TF no state reset
                    test.tf_stream_state_external_model_accuracy(
                        flags,
                        folder_name,
                        accuracy_name=
                        'stream_state_external_model_accuracy_sub_set_reset0.txt',
                        reset_state=False)  # without state reset

                    # Streaming (with internal state) evaluation using TF no state reset
                    test.tf_stream_state_internal_model_accuracy(
                        flags, folder_name)
                except (ValueError, IndexError) as e:
                    logging.info('FAILED to run TF streaming: %s', e)

            logging.info('run TFlite streaming model accuracy evaluation')
            try:
                # convert model to TFlite
                folder_name = opt_name + 'tflite_stream_state_external'
                file_name = 'stream_state_external.tflite'
                mode = modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE
                test.convert_model_tflite(flags,
                                          folder_name,
                                          mode,
                                          file_name,
                                          optimizations=optimizations)

                # Streaming model accuracy evaluation with TFLite with state reset
                test.tflite_stream_state_external_model_accuracy(
                    flags,
                    folder_name,
                    file_name,
                    accuracy_name=
                    'tflite_stream_state_external_model_accuracy_reset1.txt',
                    reset_state=True)

                # Streaming model accuracy evaluation with TFLite without state reset
                test.tflite_stream_state_external_model_accuracy(
                    flags,
                    folder_name,
                    file_name,
                    accuracy_name=
                    'tflite_stream_state_external_model_accuracy_reset0.txt',
                    reset_state=False)
            except (ValueError, IndexError) as e:
                logging.info('FAILED to run TFLite streaming: %s', e)
예제 #10
0
    def __init__(self, batch_size=512, version=1, preprocess="raw"):

        # Set PATH to data sets (for example to speech commands V2):
        # They can be downloaded from
        # https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.01.tar.gz
        # https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz
        # https://docs.google.com/uc?export=download&id=1OAN3h4uffi5HS7eb7goklWeI2XPm1jCS
        # Files should be downloaded then extracted in the google-speech-commands directory
        dataset = "google-speech-commands"
        DATA_PATH = os.path.join("data", dataset, "data{}".format(version))

        FLAGS = model_params.Params()
        FLAGS.data_dir = DATA_PATH
        FLAGS.verbosity = logging.ERROR

        # set wanted words for V2_35 dataset
        if version == 3:
            FLAGS.wanted_words = 'visual,wow,learn,backward,dog,two,left,happy,nine,go,up,bed,stop,one,zero,tree,seven,on,four,bird,right,eight,no,six,forward,house,marvin,sheila,five,off,three,down,cat,follow,yes'
            FLAGS.split_data = 0

        # set speech feature extractor properties
        FLAGS.mel_upper_edge_hertz = 7600
        FLAGS.window_size_ms = 30.0
        FLAGS.window_stride_ms = 10.0
        FLAGS.mel_num_bins = 80
        FLAGS.dct_num_features = 40
        FLAGS.feature_type = 'mfcc_tf'
        FLAGS.preprocess = preprocess

        # for numerical correctness of streaming and non streaming models set it to 1
        # but for real use case streaming set it to 0
        FLAGS.causal_data_frame_padding = 0

        FLAGS.use_tf_fft = True
        FLAGS.mel_non_zero_only = not FLAGS.use_tf_fft

        # data augmentation parameters
        FLAGS.resample = 0.15
        FLAGS.time_shift_ms = 100
        FLAGS.use_spec_augment = 1
        FLAGS.time_masks_number = 2
        FLAGS.time_mask_max_size = 25
        FLAGS.frequency_masks_number = 2
        FLAGS.frequency_mask_max_size = 7
        FLAGS.pick_deterministically = 1

        self.flags = model_flags.update_flags(FLAGS)
        import absl
        absl.logging.set_verbosity(self.flags.verbosity)


        self.flags.batch_size = batch_size
        self.time_shift_samples = int((self.flags.time_shift_ms * self.flags.sample_rate) / 1000)


        tf1.disable_eager_execution()
        config = tf1.ConfigProto(device_count={'GPU': 0})
        self.sess = tf1.Session(config=config)
        # tf1.keras.backend.set_session(self.sess)

        self.audio_processor = input_data.AudioProcessor(self.flags)
예제 #11
0
  def setUp(self):
    super(DsTcResnetTest, self).setUp()

    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    self.sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(self.sess)
    test_utils.set_seed(123)
    tf.keras.backend.set_learning_phase(0)

    # model parameters
    model_name = 'ds_tc_resnet'
    self.params = model_params.HOTWORD_MODEL_PARAMS[model_name]
    self.params.clip_duration_ms = 160
    self.params.window_size_ms = 4.0
    self.params.window_stride_ms = 2.0
    self.params.wanted_words = 'a,b,c'
    self.params.ds_padding = "'causal','causal','causal'"
    self.params.ds_filters = '8,8,4'
    self.params.ds_repeat = '1,1,1'
    self.params.ds_residual = '0,1,1'  # residual can not be applied with stride
    self.params.ds_kernel_size = '3,3,3'
    self.params.ds_stride = '2,1,1'  # streaming conv with stride
    self.params.ds_dilation = '1,1,1'
    self.params.ds_pool = '1,2,1'  # streaming conv with pool
    self.params.ds_filter_separable = '1,1,1'

    # convert ms to samples and compute labels count
    self.params = model_flags.update_flags(self.params)

    # compute total stride
    pools = utils.parse(self.params.ds_pool)
    strides = utils.parse(self.params.ds_stride)
    time_stride = [1]
    for pool in pools:
      if pool > 1:
        time_stride.append(pool)
    for stride in strides:
      if stride > 1:
        time_stride.append(stride)
    total_stride = np.prod(time_stride)

    # overide input data shape for streaming model with stride/pool
    self.params.data_stride = total_stride
    self.params.data_frame_padding = 'causal'

    # set desired number of frames in model
    frames_number = 16
    frames_per_call = total_stride
    frames_number = (frames_number // frames_per_call) * frames_per_call
    # number of input audio samples required to produce one output frame
    framing_stride = max(
        self.params.window_stride_samples,
        max(0, self.params.window_size_samples -
            self.params.window_stride_samples))
    signal_size = framing_stride * frames_number

    # desired number of samples in the input data to train non streaming model
    self.params.desired_samples = signal_size

    self.params.batch_size = 1
    self.model = ds_tc_resnet.model(self.params)
    self.model.summary()

    self.input_data = np.random.rand(self.params.batch_size,
                                     self.params.desired_samples)

    # run non streaming inference
    self.non_stream_out = self.model.predict(self.input_data)