Esempio n. 1
0
  def read_and_preprocess_batched_audio(
      self, ks,
      exs):
    """Returns batched model input, audio, and sr."""
    audios = []
    for k, ex in zip(ks, exs):
      audio = self._read_audio_and_resample(k, ex)
      if audio.ndim > 1:
        raise ValueError(f'Audio was too many dims: {audio.ndim}')
      audios.append(audio)
    sr = self._target_sample_rate

    # Do some chunking.
    if self._chunk_len:
      logging.info('Chunk len: %s', self._chunk_len)
      chunked_audios = []
      for audio in audios:
        if audio.shape[0] >= self._chunk_len:
          chunk = utils.get_chunked_audio_fn(audio, self._chunk_len)
        else:
          chunk = np.expand_dims(audio, -1)
        chunked_audios.append(chunk)
      audios = np.concatenate(chunked_audios, axis=0)
      audios = [audios[i] for i in range(audios.shape[0])]

    # Convert audio to features, if required.
    model_inputs = [self._audio_to_features(a, sr) for a in audios]
    for model_input in model_inputs:
      if model_input.shape != model_inputs[0].shape:
        raise ValueError(f'Model input shapes not the same: {model_inputs}')
      logging.info('model_input shape: %s', model_input.shape)
    batched_model_input = np.stack(model_inputs, axis=0)

    return batched_model_input, audios, sr
Esempio n. 2
0
    def tfex_to_chunked_audio(self, k, ex):

        # Read audio from tf.Example, get the sample rate, resample if necessary,
        # and convert to model inputs (if necessary).
        model_input, sample_rate = self.read_and_preprocess_audio(k, ex)

        # Do some chunking.
        if self._chunk_len:
            logging.info('Chunk len: %s', self._chunk_len)
            if model_input.shape[0] >= self._chunk_len:
                model_input = utils.get_chunked_audio_fn(
                    model_input, self._chunk_len)
            logging.info('model_input after chunking: ')

        return model_input, sample_rate