예제 #1
0
 def testSpectrogramToMelMatrixChecksFrequencyBounds(self):
     # Lower edge must be >= 0, but 0 is OK.
     mfcc_mel.SpectrogramToMelMatrix(num_spectrogram_bins=513,
                                     audio_sample_rate=22050,
                                     num_mel_bins=20,
                                     lower_edge_hertz=0.0,
                                     upper_edge_hertz=4000.0)
     with self.assertRaises(ValueError):
         mfcc_mel.SpectrogramToMelMatrix(num_spectrogram_bins=513,
                                         audio_sample_rate=22050,
                                         num_mel_bins=20,
                                         lower_edge_hertz=-1.0,
                                         upper_edge_hertz=4000.0)
     # Upper edge must be <= Nyquist, but Nyquist is OK.
     mfcc_mel.SpectrogramToMelMatrix(num_spectrogram_bins=513,
                                     audio_sample_rate=22050,
                                     num_mel_bins=20,
                                     lower_edge_hertz=20.0,
                                     upper_edge_hertz=11025.0)
     with self.assertRaises(ValueError):
         mfcc_mel.SpectrogramToMelMatrix(num_spectrogram_bins=513,
                                         audio_sample_rate=22050,
                                         num_mel_bins=20,
                                         lower_edge_hertz=20.0,
                                         upper_edge_hertz=16000.0)
     # Must be a positive gap between edges.
     with self.assertRaises(ValueError):
         mfcc_mel.SpectrogramToMelMatrix(num_spectrogram_bins=513,
                                         audio_sample_rate=22050,
                                         num_mel_bins=20,
                                         lower_edge_hertz=20.0,
                                         upper_edge_hertz=20.0)
예제 #2
0
 def testMelSpectrumAgreesWithGoldenValues(self):
   # Parallel dsp/mfcc:mel_spectrum_test.
   sample_count = 513
   input_ = np.sqrt(np.arange(1, sample_count + 1))[np.newaxis, :]
   spec_to_mel_matrix = mfcc_mel.SpectrogramToMelMatrix(
       num_spectrogram_bins=sample_count,
       audio_sample_rate=22050,
       num_mel_bins=20,
       lower_edge_hertz=20.0,
       upper_edge_hertz=4000.0)
   mel_spectrum = np.dot(input_, spec_to_mel_matrix)
   expected = np.array(
       [7.422619, 10.30330648, 13.72703292, 17.24158686, 21.35253118,
        25.77781089, 31.30624108, 37.05877236, 43.9436536, 51.80306637,
        60.79867148, 71.14363376, 82.90910141, 96.50069158, 112.08428368,
        129.96721968, 150.4277597, 173.74997634, 200.86037462, 231.59802942])
   np.testing.assert_array_almost_equal(expected, mel_spectrum[0, :])
예제 #3
0
def build_mel_calculation_graph(waveform_input,
                                sample_rate=16000,
                                window_length_seconds=0.025,
                                hop_length_seconds=0.010,
                                num_mel_bins=64,
                                lower_edge_hz=125.0,
                                upper_edge_hz=7500.0,
                                frame_width=96,
                                frame_hop=10,
                                tflite_compatible=False):
    """Build a TF graph to go from waveform to mel spectrum patches.

  Args:
    waveform_input: 1D Tensor which will be filled with 16 kHz waveform as
      tf.float32.
    sample_rate: Scalar giving the sampling rate of the waveform.  Only 16 kHz
      is acceptable at present.
    window_length_seconds: Duration of window used for each Fourier transform.
    hop_length_seconds: Time shift between successive analysis time frames.
    num_mel_bins: The number of mel frequency bins to calculate.
    lower_edge_hz: Frequency boundary at bottom edge of mel mapping.
    upper_edge_hz: Frequency boundary at top edge of mel mapping.
    frame_width: The number of successive time frames to include in each patch.
    frame_hop: The frame advance between successive patches.
    tflite_compatible: Avoid ops not currently supported in tflite.

  Returns:
    Tensor holding [num_patches, frame_width, num_mel_bins] log-mel-spectrogram
    patches.
  """
    # `waveform_input` is a [?] vector as a tensor.
    # `magnitude_spectrogram` is a [?, fft_length/2 + 1] tensor of spectrograms.
    # Derive the dependent parameters.
    window_length_samples = int(round(window_length_seconds * sample_rate))
    hop_length_samples = int(round(hop_length_seconds * sample_rate))
    fft_length = 2**int(
        math.ceil(math.log(window_length_samples) / math.log(2.0)))
    if tflite_compatible:
        magnitude_spectrogram = _stft_magnitude_tflite(waveform_input,
                                                       window_length_samples,
                                                       hop_length_samples,
                                                       fft_length)
    else:
        magnitude_spectrogram = _stft_magnitude_full_tf(
            waveform_input, window_length_samples, hop_length_samples,
            fft_length)

    # Warp the linear-scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrogram.shape[-1].value
    if tflite_compatible:
        linear_to_mel_weight_matrix = tf.constant(
            mfcc_mel.SpectrogramToMelMatrix(num_mel_bins, num_spectrogram_bins,
                                            sample_rate, lower_edge_hz,
                                            upper_edge_hz).astype(np.float32),
            name='linear_to_mel_matrix')
    else:
        # In full tf, the mel weight matrix is calculated at run time within the
        # TF graph.  This avoids including a matrix of 64 x 256 float values (i.e.,
        # 100 kB or more, depending on the representation) in the exported graph.
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hz,
            upper_edge_hz)

    mel_spectrogram = tf.matmul(magnitude_spectrogram,
                                linear_to_mel_weight_matrix,
                                name='mel_spectrogram')
    log_offset = 0.001
    log_mel_spectrogram = tf.log(mel_spectrogram + log_offset,
                                 name='log_mel_spectrogram')
    # log_mel_spectrogram is a [?, num_mel_bins] gram.
    if tflite_compatible:
        features = _fixed_frame(log_mel_spectrogram,
                                frame_length=frame_width,
                                frame_step=frame_hop,
                                first_axis=True)
    else:
        features = tf.signal.frame(log_mel_spectrogram,
                                   frame_length=frame_width,
                                   frame_step=frame_hop,
                                   axis=0)
    # features is [num_patches, frame_width, num_mel_bins].
    return features