def _preprocess_audio(audio_file_path, audio_featurizer, normalize):
    """Load the audio file and compute spectrogram feature."""
    data, _ = soundfile.read(audio_file_path)
    feature = featurizer.compute_spectrogram_feature(
        data, audio_featurizer.sample_rate, audio_featurizer.stride_ms,
        audio_featurizer.window_ms)
    # Feature normalization
    if normalize:
        feature = _normalize_audio_feature(feature)

    # Adding Channel dimension for conv2D input.
    feature = np.expand_dims(feature, axis=2)
    return feature
Beispiel #2
0
def _preprocess_audio(audio_file_path, audio_featurizer, normalize):
  """Load the audio file and compute spectrogram feature."""
  data, _ = soundfile.read(audio_file_path)
  feature = featurizer.compute_spectrogram_feature(
      data, audio_featurizer.sample_rate, audio_featurizer.stride_ms,
      audio_featurizer.window_ms)
  # Feature normalization
  if normalize:
    feature = _normalize_audio_feature(feature)

  # Adding Channel dimension for conv2D input.
  feature = np.expand_dims(feature, axis=2)
  return feature
Beispiel #3
0
def _preprocess_audio(audio_file_path, audio_sample_rate, audio_featurizer,
                      normalize):
    """Load the audio file in memory and compute spectrogram feature."""
    tf.logging.info(
        "Extracting spectrogram feature for {}".format(audio_file_path))
    sample_rate, data = wavfile.read(audio_file_path)
    assert sample_rate == audio_sample_rate
    if data.dtype not in [np.float32, np.float64]:
        data = data.astype(np.float32) / np.iinfo(data.dtype).max
    feature = featurizer.compute_spectrogram_feature(
        data, audio_featurizer.frame_length, audio_featurizer.frame_step,
        audio_featurizer.fft_length)
    if normalize:
        feature = _normalize_audio_feature(feature)
    return feature