Beispiel #1
0
def extract_features(signals, sample_rates, feattype, spec_kwargs, melspec_kwargs, mfcc_kwargs, db_spec_kwargs, feat_scale_kwargs, window_norm_kwargs):
    tf.debugging.assert_rank(signals, 2, message="Input signals for feature extraction must be batches of mono signals without channels, i.e. of shape [B, N] where B is batch size and N number of samples.")
    tf.debugging.assert_equal(sample_rates, [sample_rates[0]], message="Different sample rates in a single batch not supported, all signals in the same batch should have the same sample rate.")
    #TODO batches with different sample rates (probably not worth the effort)
    sample_rate = sample_rates[0]
    X = audio_features.spectrograms(signals, sample_rate, **spec_kwargs)
    tf.debugging.assert_all_finite(X, "spectrogram failed")
    if feattype in ("melspectrogram", "logmelspectrogram", "mfcc"):
        X = audio_features.melspectrograms(X, sample_rate=sample_rate, **melspec_kwargs)
        tf.debugging.assert_all_finite(X, "melspectrogram failed")
        if feattype in ("logmelspectrogram", "mfcc"):
            X = tf.math.log(X + 1e-6)
            tf.debugging.assert_all_finite(X, "logmelspectrogram failed")
            if feattype == "mfcc":
                coef_begin = mfcc_kwargs.get("coef_begin", 1)
                coef_end = mfcc_kwargs.get("coef_end", 13)
                mfccs = tf.signal.mfccs_from_log_mel_spectrograms(X)
                X = mfccs[..., coef_begin:coef_end]
                tf.debugging.assert_all_finite(X, "mfcc failed")
    elif feattype in ("db_spectrogram",):
        X = audio_features.power_to_db(X, **db_spec_kwargs)
        tf.debugging.assert_all_finite(X, "db_spectrogram failed")
    if feat_scale_kwargs:
        X = features.feature_scaling(X, **feat_scale_kwargs)
        tf.debugging.assert_all_finite(X, "feature scaling failed")
    if window_norm_kwargs:
        X = features.window_normalization(X, **window_norm_kwargs)
        tf.debugging.assert_all_finite(X, "window normalization failed")
    return X
 def test_linear_to_mel(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         for num_mel_bins in range(10, 100, 15):
             powspecs = audio.spectrograms(np.expand_dims(s, 0), r)
             melspec = audio.linear_to_mel(powspecs, r, num_mel_bins=num_mel_bins)[0]
             assert not np.isnan(melspec.numpy()).any()
             assert melspec.shape[0] == powspecs[0].shape[0]
             assert melspec.shape[1] == num_mel_bins
 def test_spectrograms(self):
     for path in audiofiles:
         s, r = audio.read_wav(path)
         for len_ms in range(20, 101, 20):
             for n_fft in (256, 512, 1024, 2048):
                 if n_fft < audio.ms_to_frames(r, len_ms):
                     continue
                 step_ms = len_ms // 2
                 powspec = audio.spectrograms(np.expand_dims(s, 0), r,
                         frame_length_ms=len_ms,
                         frame_step_ms=step_ms,
                         fft_length=n_fft)[0]
                 assert not np.isnan(powspec.numpy()).any()
                 assert powspec.shape[0] == s.shape[0] // audio.ms_to_frames(r, step_ms) - 1
                 assert powspec.shape[1] == n_fft // 2 + 1
Beispiel #4
0
def logmelspectrograms(signals, rate):
    powspecs = spectrograms(signals, rate)
    melspecs = linear_to_mel(powspecs, rate, num_mel_bins=40)
    return tf.math.log(melspecs + 1e-6)
Beispiel #5
0
def compute_mag_spec(signal, sample_rate):
    return audio_feat.spectrograms([signal],
                                   sample_rate,
                                   frame_length_ms,
                                   frame_step_ms,
                                   power=1.0)