예제 #1
0
def test_spectral_bandwidth_synthetic():
    # This test ensures that a signal confined to a single frequency bin
    # always achieves 0 bandwidth
    k = 5

    def __test(S, freq, sr, n_fft, norm, p):
        bw = librosa.feature.spectral_bandwidth(S=S, freq=freq, norm=norm, p=p)

        assert not np.any(bw)

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 10))
    S[k, :] = 1.0

    for norm in [False, True]:
        for p in [1, 2]:
            # With vanilla frequencies
            yield __test, S, None, sr, n_fft, norm, p

            # With explicit frequencies
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
            yield __test, S, freq, sr, n_fft, norm, p

            # And if we modify the frequencies
            freq = 3 * librosa.fft_frequencies(sr=sr, n_fft=n_fft)
            yield __test, S, freq, sr, n_fft, norm, p

            # Or if we make up random frequencies for each frame
            freq = np.random.randn(*S.shape)
            yield __test, S, freq, sr, n_fft, norm, p
예제 #2
0
def test_spectral_bandwidth_onecol():
    # This test checks for issue https://github.com/librosa/librosa/issues/552
    # failure when the spectrogram has a single column

    def __test(S, freq):
        bw = librosa.feature.spectral_bandwidth(S=S, freq=freq)

        assert bw.shape == (1, 1)

    k = 5

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 1))
    S[k, :] = 1.0

    # With vanilla frequencies
    yield __test, S, None

    # With explicit frequencies
    freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq

    # And if we modify the frequencies
    freq = 3 * librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq

    # Or if we make up random frequencies for each frame
    freq = np.random.randn(*S.shape)
    yield __test, S, freq
예제 #3
0
def test_spectral_bandwidth_synthetic():
    # This test ensures that a signal confined to a single frequency bin
    # always achieves 0 bandwidth
    k = 5

    def __test(S, freq, sr, n_fft, norm, p):
        bw = librosa.feature.spectral_bandwidth(S=S, freq=freq, norm=norm, p=p)

        assert not np.any(bw)

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 10))
    S[k, :] = 1.0

    for norm in [False, True]:
        for p in [1, 2]:
            # With vanilla frequencies
            yield __test, S, None, sr, n_fft, norm, p

            # With explicit frequencies
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
            yield __test, S, freq, sr, n_fft, norm, p

            # And if we modify the frequencies
            freq = 3 * librosa.fft_frequencies(sr=sr, n_fft=n_fft)
            yield __test, S, freq, sr, n_fft, norm, p

            # Or if we make up random frequencies for each frame
            freq = np.random.randn(*S.shape)
            yield __test, S, freq, sr, n_fft, norm, p
예제 #4
0
def test_spectral_bandwidth_onecol():
    # This test checks for issue https://github.com/librosa/librosa/issues/552
    # failure when the spectrogram has a single column

    def __test(S, freq):
        bw = librosa.feature.spectral_bandwidth(S=S, freq=freq)

        assert bw.shape == (1, 1)

    k = 5

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 1))
    S[k, :] = 1.0

    # With vanilla frequencies
    yield __test, S, None

    # With explicit frequencies
    freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq

    # And if we modify the frequencies
    freq = 3 * librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq

    # Or if we make up random frequencies for each frame
    freq = np.random.randn(*S.shape)
    yield __test, S, freq
예제 #5
0
def plot_stft(freq_domain_data, sr):
    print('Frequencies: ', librosa.fft_frequencies(sr=sr, n_fft=FRAME_SIZE))
    print(freq_domain_data.shape)
    for freq in range(
            librosa.fft_frequencies(sr=sr, n_fft=FRAME_SIZE).shape[0]):
        plt.plot(librosa.fft_frequencies(sr=sr, n_fft=FRAME_SIZE),
                 np.abs(freq_domain_data[:, :]))
        plt.show()

        choice = input('Continue?(Y/N):')
        if choice != 'y' and choice != 'Y':
            break
예제 #6
0
def plot_melfilters(sr=16000, n_fft=512, n_mels=10, fmin=0, fmax=None):

    mel = librosa.filters.mel(sr=sr,
                              n_fft=n_fft,
                              n_mels=n_mels,
                              fmin=fmin,
                              fmax=fmax,
                              norm=None)
    mel_norm = librosa.filters.mel(sr=sr,
                                   n_fft=n_fft,
                                   n_mels=n_mels,
                                   fmin=fmin,
                                   fmax=fmax,
                                   norm=1)
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    fig, (base_ax, norm_ax) = plt.subplots(2, figsize=(16, 8))

    base_ax.plot(freqs, mel.T)
    base_ax.set_title('Mel filters')

    norm_ax.plot(freqs, mel_norm.T)
    norm_ax.set_title('Mel filters normalized (area=1)')

    base_ax.tick_params(axis='x',
                        which='both',
                        bottom=False,
                        top=False,
                        labelbottom=False)
    norm_ax.set_xlabel('Frequency (Hz)')

    return fig
예제 #7
0
def test_salience_multi_static(s_multi, filter_peaks):
    S, sr = s_multi

    freqs = librosa.fft_frequencies(sr=sr)

    sal_all = librosa.salience(
        S,
        freqs=freqs,
        harmonics=[0.5, 1, 2, 3],
        kind="slinear",
        filter_peaks=filter_peaks,
        fill_value=0,
    )
    sal_0 = librosa.salience(
        S[0],
        freqs=freqs,
        harmonics=[0.5, 1, 2, 3],
        kind="slinear",
        filter_peaks=filter_peaks,
        fill_value=0,
    )
    sal_1 = librosa.salience(
        S[1],
        freqs=freqs,
        harmonics=[0.5, 1, 2, 3],
        kind="slinear",
        filter_peaks=filter_peaks,
        fill_value=0,
    )

    assert np.allclose(sal_all[0], sal_0)
    assert np.allclose(sal_all[1], sal_1)
    assert not np.allclose(sal_0, sal_1)
예제 #8
0
    def get_spectrogram(self):
        """
        The original spectrogram is first mean filtered. Then it's dimensions are edited so as to match that of the
        tempogram. This is done using scaling and 2d interpolation. It is finally multiplied by a multiplier

        :return: Manipulated Spectrogram
        """
        spectrogram = librosa.amplitude_to_db(self.stft,
                                              ref=np.max)  # Spectrogram
        spectrogram = librosa.decompose.nn_filter(
            spectrogram, aggregate=np.mean)  # Spectrogram Mean Filtered
        freqs = librosa.fft_frequencies(sr=self.sample_rate)
        sampling_freqs = np.logspace(start=0,
                                     stop=np.log(freqs[-1]) / np.log(10),
                                     num=self.tempo_final.shape[0],
                                     endpoint=True)
        sampling_freqs_indices = (sampling_freqs - np.min(sampling_freqs)) / \
                                 (np.max(sampling_freqs) - np.min(sampling_freqs)) * spectrogram.shape[0]
        freq_indices = np.arange(0, spectrogram.shape[0])
        time_indices = np.arange(0, spectrogram.shape[1])
        f = interpolate.interp2d(time_indices,
                                 freq_indices,
                                 spectrogram,
                                 kind='linear')
        spectrogram = f(time_indices, sampling_freqs_indices)
        spectrogram = (spectrogram - np.min(spectrogram)) / \
                      (np.max(spectrogram) - np.min(spectrogram))
        return spectrogram * self.spectr_mult
예제 #9
0
def test_spectral_rolloff_synthetic():

    srand()

    sr = 22050
    n_fft = 2048

    def __test(S, freq, pct):

        rolloff = librosa.feature.spectral_rolloff(S=S,
                                                   sr=sr,
                                                   freq=freq,
                                                   roll_percent=pct)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        idx = np.floor(pct * freq.shape[0]).astype(int)
        assert np.allclose(rolloff, freq[idx])

    S = np.ones((1 + n_fft // 2, 10))

    for pct in [0.25, 0.5, 0.95]:
        # Implicit frequencies
        yield __test, S, None, pct

        # Explicit frequencies
        freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
        yield __test, S, freq, pct

        # And time-varying frequencies
        freq = np.cumsum(np.abs(np.random.randn(*S.shape)), axis=0)
        yield __test, S, freq, pct
예제 #10
0
def phase_angle_inc_generator(_fftFrameSize, _hop_length, sampleRate):
    #Finding the period of each bin's central frequency of my fft.

    #to stop a divide by 0.
    fft_frequencies = librosa.fft_frequencies(sr=sampleRate,
                                              n_fft=_fftFrameSize)

    # print("Phase_gen: ", fft_frequencies.shape, fft_frequencies[0], fft_frequencies[1], fft_frequencies[1024])

    fft_frequencies[0] = 1
    fft_freq_period_sample = sampleRate / fft_frequencies

    #Stopping the 0's element of fft_freq_period_sample from being "inf".
    #MAJOR ISSUE: 0th element is inf, 1st element is 2048, 1024th element is 2. What is the 0th element and why does this STFT return 1 more value than half the frameSize.
    #    the answer is probably the reason the phase vocoding sounds lame.
    #Going to make the 0th bin phase 0 later in this code.
    fft_freq_period_sample[0] = _fftFrameSize

    #dividing the hoplength by the period of the bins
    # to create an value to increment the phase by for each hop.
    # and scaling the the value to a number between 2*PI.
    fft_freq_period_angle_hopInc = (_hop_length /
                                    fft_freq_period_sample) * 2 * np.pi

    fft_freq_period_angle_hopInc[0] = 0.0

    #return the array of hop incrememnts.
    return fft_freq_period_angle_hopInc
예제 #11
0
def test_spectral_centroid_synthetic():

    k = 5

    def __test(S, freq, sr, n_fft):
        cent = librosa.feature.spectral_centroid(S=S, freq=freq)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        assert np.allclose(cent, freq[k])

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 10))

    S[k, :] = 1.0

    yield __test, S, None, sr, n_fft

    freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq, sr, n_fft

    # And if we modify the frequencies
    freq *= 3
    yield __test, S, freq, sr, n_fft

    # Or if we make up random frequencies for each frame
    freq = np.random.randn(*S.shape)
    yield __test, S, freq, sr, n_fft
예제 #12
0
def rms_energy_infra(filenames,
                     *,
                     frame_length: int = 500,
                     hop_length: int = None,
                     threshold: int = 20):
    import librosa

    if hop_length is None:
        hop_length = frame_length / 4

    res = []
    for fn in filenames:
        y, fs = librosa.load(fn, None)
        frame_length_used = int(fs * frame_length / 1000)
        hop_length_used = int(fs * hop_length / 1000)
        S, _ = librosa.magphase(
            librosa.stft(y,
                         n_fft=frame_length_used,
                         hop_length=hop_length_used))
        freqs = librosa.fft_frequencies(sr=fs, n_fft=frame_length_used)
        S[freqs > threshold, :] = 0

        res.append(librosa.feature.rms(S=S, frame_length=frame_length_used, hop_length=hop_length_used)\
         .reshape(-1, 1).astype('float32'))

    return res
예제 #13
0
def test_spectral_rolloff_synthetic():

    srand()

    sr = 22050
    n_fft = 2048

    def __test(S, freq, pct):

        rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr, freq=freq,
                                                   roll_percent=pct)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        idx = np.floor(pct * freq.shape[0]).astype(int)
        assert np.allclose(rolloff, freq[idx])

    S = np.ones((1 + n_fft // 2, 10))

    for pct in [0.25, 0.5, 0.95]:
        # Implicit frequencies
        yield __test, S, None, pct

        # Explicit frequencies
        freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
        yield __test, S, freq, pct

        # And time-varying frequencies
        freq = np.cumsum(np.abs(np.random.randn(*S.shape)), axis=0)
        yield __test, S, freq, pct
예제 #14
0
    def __test(S, freq, sr, n_fft):
        cent = librosa.feature.spectral_centroid(S=S, freq=freq)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        assert np.allclose(cent, freq[k])
예제 #15
0
    def __call__(self, sample):
        y, sr = sample['wav']
        y, sr = librosa.resample(y, sr, 22050), 22050
        ref_times, ref_freqs = sample['gt']

        fft_f = librosa.fft_frequencies(sr, self.n_fft)
        f_interp = interp1d(librosa.time_to_frames(ref_times, sr,
                                                   self.hop_length,
                                                   self.n_fft),
                            ref_freqs,
                            fill_value=0.0,
                            bounds_error=False)
        fft = librosa.stft(y, self.n_fft, self.hop_length)
        n_fft = np.zeros(fft.shape, dtype=fft.dtype)
        for frame in range(fft.shape[1]):
            freq = f_interp(frame)
            for i in range(self.n_harmonics):
                idx = np.argmin(np.abs(fft_f - freq * (i + 1)))
                if np.abs(fft_f[idx] - freq *
                          (i + 1)) < fft_f[idx] * (2**(1 / 24) - 1):
                    n_fft[idx, frame] = fft[idx, frame]
                else:
                    fft[:, frame] = 0

        y = librosa.istft(n_fft, self.hop_length)
        y = y / max(y)
        if self.new_key is None:
            sample['wav'] = y, sr
        else:
            sample[self.new_key] = y, sr
        return sample
예제 #16
0
def test_spectral_centroid_synthetic():

    k = 5

    def __test(S, freq, sr, n_fft):
        cent = librosa.feature.spectral_centroid(S=S, freq=freq)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        assert np.allclose(cent, freq[k])

    srand()
    # construct a fake spectrogram
    sr = 22050
    n_fft = 1024
    S = np.zeros((1 + n_fft // 2, 10))

    S[k, :] = 1.0

    yield __test, S, None, sr, n_fft

    freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    yield __test, S, freq, sr, n_fft

    # And if we modify the frequencies
    freq *= 3
    yield __test, S, freq, sr, n_fft

    # Or if we make up random frequencies for each frame
    freq = np.random.randn(*S.shape)
    yield __test, S, freq, sr, n_fft
예제 #17
0
    def __test(S, freq, sr, n_fft):
        cent = librosa.feature.spectral_centroid(S=S, freq=freq)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        assert np.allclose(cent, freq[k])
예제 #18
0
def freq_slice(fmin, fmax, sr, n_fft):
    '''Calculate the slice needed to select a frequency band.

    Arguments:
        fmin, fmax (int): the frequency bounds
        sr (int): the sample rate
        n_fft (int): the fft size

    Returns:
        slice(i[fmin], i[fmax])
    '''
    if not sr or not n_fft:
        raise ParameterError("You must set a sr=({}) and n_fft=({})".format(
            sr, n_fft))

    if fmin and fmin < 0:
        raise ParameterError("fmin={} must be nonnegative".format(fmin))

    if fmax and fmax > (sr / 2):
        raise ParameterError(
            "fmax={} must be smaller than nyquist, f={}".format(fmax, sr))

    fft_frequencies = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    bin_start = np.where(fft_frequencies >= fmin)[0][0] if fmin else None
    bin_stop = np.where(fft_frequencies < fmax)[0][-1] if fmax else None
    return slice(bin_start, bin_stop)
예제 #19
0
def old_stuff():
    """backup: old lingering stuff"""
    
    y, f0 = get_f0_series('/home/rafa/dev/sound/440-10-partials/440-10-partials.wav')

    # Overlay F0 over a spectrogram

    import matplotlib.pyplot as plt
    import numpy as np
    import librosa.display

    amplitude = np.abs(librosa.stft(y))
    spectrum = librosa.amplitude_to_db(amplitude, ref=np.max)
    frequencies = librosa.fft_frequencies()

    fig, ax = plt.subplots()
    img = librosa.display.specshow(spectrum, x_axis='time', y_axis='log', ax=ax)
    ax.set(title='pYIN fundamental frequency estimation')
    fig.colorbar(img, ax=ax, format="%+2.f dB")
    times = librosa.times_like(f0)
    ax.plot(times, f0*FIFTY_CENTS_BWD, label='bwd', color='red', linewidth=1)
    ax.plot(times, f0, label='f0', color='cyan', linewidth=1)
    ax.plot(times, f0*FIFTY_CENTS_FWD, label='fwd', color='red', linewidth=1)
    ax.legend(loc='upper right')
    fig.savefig('plot.png')
예제 #20
0
def compute_loudness(audio,
                     sample_rate=16000,
                     frame_rate=50,
                     n_fft=2048,
                     range_db=120.0,
                     ref_db=20.7):
    """Perceptual loudness in dB, relative to white noise, amplitude=1.

    Args:
        audio: tensor. Shape [batch_size, audio_length] or [audio_length].
        sample_rate: Audio sample rate in Hz.
        frame_rate: Rate of loudness frames in Hz.
        n_fft: Fft window size.
        range_db: Sets the dynamic range of loudness in decibels. The minimum loudness (per a frequency bin) corresponds to -range_db.
        ref_db: Sets the reference maximum perceptual loudness as given by (A_weighting + 10 * log10(abs(stft(audio))**2.0). The default value corresponds to white noise with amplitude=1.0 and n_fft=2048. There is a slight dependence on fft_size due to different granularity of perceptual weighting.

    Returns:
        Loudness in decibels. Shape [batch_size, n_frames] or [n_frames,].
    """
    # Temporarily a batch dimension for single examples.
    is_1d = (len(audio.shape) == 1)
    if is_1d:
        audio = audio[None, :]

    # Take STFT.
    hop_length = sample_rate // frame_rate
    s = torch.stft(audio, n_fft=n_fft, hop_length=hop_length)
    # batch, frequency_bins, n_frames

    # Compute power of each bin
    amplitude = torch.sqrt(amp(s) + 1e-5)  #sqrt(0) gives nan gradient
    power_db = torch.log10(amplitude + 1e-5)
    power_db *= 20.0

    # Perceptual weighting.
    frequencies = librosa.fft_frequencies(sr=sample_rate, n_fft=n_fft)
    a_weighting = librosa.A_weighting(frequencies)[None, :, None]
    loudness = power_db + torch.from_numpy(a_weighting.astype(np.float32)).to(
        audio.device)

    # Set dynamic range.
    loudness -= ref_db
    loudness = torch.clamp(loudness, min=-range_db)

    # Average over frequency bins.
    loudness = torch.mean(loudness, dim=1)

    # Remove temporary batch dimension.
    loudness = loudness[0] if is_1d else loudness

    # Compute expected length of loudness vector
    n_secs = audio.shape[-1] / float(
        sample_rate)  # `n_secs` can have milliseconds
    expected_len = int(n_secs * frame_rate)

    # Pad with `-range_db` noise floor or trim vector
    loudness = pad_or_trim_to_expected_length(loudness, expected_len,
                                              -range_db)
    return loudness
예제 #21
0
def filterbank_log(sr, n_freq, n_bins=84, bins_per_octave=12,
                   fmin=None, spread=0.125):  # pragma: no cover
    """[np] Approximate a constant-Q filter bank for a fixed-window STFT.

    Each filter is a log-normal window centered at the corresponding frequency.

    Note: `logfrequency` in librosa 0.4 (deprecated), so copy-and-pasted,
        `tuning` was removed, `n_freq` instead of `n_fft`.

    Parameters
    ----------
    sr : number > 0 [scalar]
        audio sampling rate

    n_freq : int > 0 [scalar]
        number of frequency bins

    n_bins : int > 0 [scalar]
        Number of bins.  Defaults to 84 (7 octaves).

    bins_per_octave : int > 0 [scalar]
        Number of bins per octave. Defaults to 12 (semitones).

    fmin : float > 0 [scalar]
        Minimum frequency bin. Defaults to `C1 ~= 32.70`

    spread : float > 0 [scalar]
        Spread of each filter, as a fraction of a bin.

    Returns
    -------
    C : np.ndarray [shape=(n_bins, 1 + n_fft/2)]
        log-frequency filter bank.
    """

    if fmin is None:
        fmin = 32.70319566

    # What's the shape parameter for our log-normal filters?
    sigma = float(spread) / bins_per_octave

    # Construct the output matrix
    basis = np.zeros((n_bins, n_freq))

    # Get log frequencies of bins
    log_freqs = np.log2(librosa.fft_frequencies(sr, (n_freq - 1) * 2)[1:])

    for i in range(n_bins):
        # What's the center (median) frequency of this filter?
        c_freq = fmin * (2.0 ** (float(i) / bins_per_octave))

        # Place a log-normal window around c_freq
        basis[i, 1:] = np.exp(-0.5 * ((log_freqs - np.log2(c_freq)) / sigma) ** 2
                              - np.log2(sigma) - log_freqs)

    # Normalize the filters
    basis = librosa.util.normalize(basis, norm=1, axis=1)

    return basis.astype(K.floatx())
예제 #22
0
def test_spectral_centroid_synthetic(S_ideal, freq):
    n_fft = 2 * (S_ideal.shape[0] - 1)
    cent = librosa.feature.spectral_centroid(S=S_ideal, freq=freq)

    if freq is None:
        freq = librosa.fft_frequencies(sr=22050, n_fft=n_fft)

    assert np.allclose(cent, freq[5])
예제 #23
0
def ambix_power_map_freq(audio, audio_rate, freq_lims):
    import librosa

    # Filter out frequencies
    audio_masked = []
    for a in audio:
        spect = librosa.core.stft(a)

        mask = (librosa.fft_frequencies(audio_rate) > freq_lims[0]).astype(float) * \
               (librosa.fft_frequencies(audio_rate) < freq_lims[1]).astype(float)
        spec_masked = spect * mask[:, np.newaxis]
        audio_masked += [librosa.core.istft(spec_masked)]
    audio_masked = np.stack(audio_masked, 0)

    # Compute source map
    audio_maps = ambix_power_map(audio_masked, audio_rate=audio_rate, outp_rate=5, angular_res=5.)
    return audio_maps
예제 #24
0
 def __init__(self, sr, n_fft, min_db):
     super().__init__()
     self.min_db = min_db
     freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
     self.a_weighting = torch.nn.Parameter(
         data=torch.from_numpy(librosa.A_weighting(freqs + 1e-10)),
         requires_grad=False,
     )
예제 #25
0
def relacion_fundamental_harmonicos(file):
    y, sr = librosa.load(file)
    # y, sr = librosa.load("../audios/clash cymbals/clash-cymbals__long_forte_undamped.mp3")
    h_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    S = np.abs(librosa.stft(y))
    fft_freqs = librosa.fft_frequencies(sr=sr)
    S_harm = librosa.interp_harmonics(S, fft_freqs, h_range, axis=0)

    return np.sum(S_harm[1]) / np.sum(S_harm[2:])
예제 #26
0
파일: backend.py 프로젝트: timgates42/kapre
def filterbank_log(sample_rate, n_freq, n_bins=84, bins_per_octave=12, f_min=None, spread=0.125):
    """A function that returns a approximation of constant-Q filter banks for a fixed-window STFT.
    Each filter is a log-normal window centered at the corresponding frequency.

    Args:
        sample_rate (`int`): audio sampling rate
        n_freq (`int`): number of the input frequency bins. E.g., `n_fft / 2 + 1`
        n_bins (`int`): number of the resulting log-frequency bins.  Defaults to 84 (7 octaves).
        bins_per_octave (`int`): number of bins per octave. Defaults to 12 (semitones).
        f_min (`float`): lowest frequency that is going to be included in the log filterbank. Defaults to `C1 ~= 32.70`
        spread (`float`): spread of each filter, as a fraction of a bin.

    Returns:
        (`Tensor`): log-frequency filterbanks. Shape=`(n_freq, n_bins)`

    Note:
        The code is originally from `logfrequency` in librosa 0.4 (deprecated) and copy-and-pasted.
        `tuning` parameter was removed and we use `n_freq` instead of `n_fft`.
    """

    if f_min is None:
        f_min = 32.70319566

    f_max = f_min * 2 ** (n_bins / bins_per_octave)
    if f_max > sample_rate // 2:
        raise RuntimeError(
            'Maximum frequency of log filterbank should be lower or equal to the maximum'
            'frequency of the input (defined by its sample rate), '
            'but f_max=%f and maximum frequency is %f. \n'
            'Fix it by reducing n_bins, increasing bins_per_octave and/or reducing f_min.\n'
            'You can also do it by increasing sample_rate but it means you need to upsample'
            'the input audio data, too.' % (f_max, sample_rate)
        )

    # What's the shape parameter for our log-normal filters?
    sigma = float(spread) / bins_per_octave

    # Construct the output matrix
    basis = np.zeros((n_bins, n_freq))

    # Get log frequencies of bins
    log_freqs = np.log2(librosa.fft_frequencies(sample_rate, (n_freq - 1) * 2)[1:])

    for i in range(n_bins):
        # What's the center (median) frequency of this filter?
        c_freq = f_min * (2.0 ** (float(i) / bins_per_octave))

        # Place a log-normal window around c_freq
        basis[i, 1:] = np.exp(
            -0.5 * ((log_freqs - np.log2(c_freq)) / sigma) ** 2 - np.log2(sigma) - log_freqs
        )

    # Normalize the filters
    basis = librosa.util.normalize(basis, norm=1, axis=1)
    basis = basis.astype(K.floatx())

    return tf.convert_to_tensor(basis.T)
예제 #27
0
def extract_n_freq(N, audio, sample_rate=44100, n_fft=2048, use_stft=False):
    audio = _normalize(audio)
    audio = _clean_input(audio)
    fft_data = _get_fft_data(audio)
    bin_to_freq = librosa.fft_frequencies(sample_rate, n_fft)
    top_N = _get_n_top(fft_data, N, bin_to_freq)
    top_N = [(freq, amp) for freq, amp in top_N if freq > 40]
    top_N = _normalize_N_top(top_N)
    return top_N
예제 #28
0
def poly_S(poly_coeffs, poly_freq):
    if poly_freq is None:
        poly_freq = librosa.fft_frequencies()

    S = np.zeros_like(poly_freq)
    for i, c in enumerate(poly_coeffs):
        S += c * poly_freq**i

    return S.reshape((poly_freq.shape[0], -1))
예제 #29
0
    def __test(S, freq, pct):

        rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr, freq=freq,
                                                   roll_percent=pct)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        idx = np.floor(pct * freq.shape[0]).astype(int)
        assert np.allclose(rolloff, freq[idx])
예제 #30
0
def perceptual_weights():
    """A-weighted frequency-dependent perceptual loudness weights"""
    frequencies = librosa.fft_frequencies(sr=torchcrepe.SAMPLE_RATE,
                                          n_fft=torchcrepe.WINDOW_SIZE)

    # A warning is raised for nearly inaudible frequencies, but it ends up
    # defaulting to -100 db. That default is fine for our purposes.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', RuntimeWarning)
        return librosa.A_weighting(frequencies)[:, None] - REF_DB
예제 #31
0
    def __test(S, freq, pct):

        rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr, freq=freq,
                                                   roll_percent=pct)

        if freq is None:
            freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        idx = np.floor(pct * freq.shape[0]).astype(int)
        assert np.allclose(rolloff, freq[idx])
예제 #32
0
def test_freq_ticks():

    def __test(locs, freqs, n_ticks, axis):

        if freqs is None:
            args = [locs]
            fmax = max(locs)
        else:
            args = [locs, freqs]
            fmax = max(freqs)

        fig = plt.figure()
        (ticks, labels), fmt = librosa.display.frequency_ticks(*args,
                                                               axis=axis,
                                                               n_ticks=n_ticks)
        plt.close(fig)

        if n_ticks is None:
            n_ticks = len(locs)

        eq_(len(ticks), n_ticks)
        eq_(len(labels), n_ticks)

        if fmt == 'mHz':
            assert fmax <= 1e1
        elif fmt == 'Hz':
            assert fmax <= 1e4
        elif fmt == 'kHz':
            assert fmax <= 1e7
        elif fmt == 'MHz':
            assert fmax <= 1e10
        elif fmt == 'GHz':
            assert fmax > 1e10
        else:
            raise ValueError('Incorrect fmt={}'.format(fmt))

        if axis == 'x':
            cls = matplotlib.axis.XTick
        elif axis == 'y':
            cls = matplotlib.axis.YTick
        else:
            raise ValueError('Incorrect axis={}'.format(axis))
        
        assert all([isinstance(_, cls) for _ in ticks])


    for sr in [1e-3, 1e1, 1e3, 1e5, 1e8, 1e12]:
        locs = librosa.fft_frequencies(sr=sr, n_fft=32)
        
        for freqs in [None, locs]:
            for n_ticks in [3, 5, None]:
                for axis in ['x', 'y']:
                    yield __test, locs, freqs, n_ticks, axis

    yield raises(librosa.ParameterError)(__test), locs, freqs, n_ticks, 23
예제 #33
0
    def compute_loudness(self, n_fft=256):

        fourier = librosa.stft(self.audio_raw, n_fft=n_fft)
        S = np.abs(fourier * np.conj(fourier))
        log_S = librosa.perceptual_weighting(
            S**2, librosa.fft_frequencies(n_fft=n_fft))
        self.loudness = log_S.sum(axis=0, keepdims=True)[0]
        self.n_points = int(n_fft / 4)
        self.n_windows = int(np.ceil(len(self.audio_raw) / self.n_points))
        print('processing windows: {}'.format(self.n_windows))
        print('points per windows: {}'.format(self.n_points))
예제 #34
0
def __extract_frame_features_single_file(wav_file):
    y, sr = librosa.load(str(wav_file), sr=SR, dtype=np.float32)
    D = librosa.stft(y,
                     n_fft=FRAME_LENGTH,
                     hop_length=HOP_LENGTH,
                     window='hann')
    mag, phase = librosa.magphase(D, power=1)
    features = []

    mel_s = librosa.feature.melspectrogram(S=mag**2, n_mels=40, fmax=SR)
    freq = librosa.fft_frequencies(sr=SR, n_fft=FRAME_LENGTH)

    #  MFCC shape: (N_MFCC,t) where t is number of frames
    mfcc_feat = librosa.feature.mfcc(S=librosa.power_to_db(mel_s),
                                     n_mfcc=N_MFCC)
    features.append(mfcc_feat)

    # MFCC delta1, shape: (N_MFCC, t)
    mfcc_delta = librosa.feature.delta(mfcc_feat)
    features.append(mfcc_delta)

    # MFCC delta2
    # mfcc_delta2 = librosa.feature.delta(mfcc_feat, order=2)
    # features.append(mfcc_delta2)

    # zero crossing rate, shape: (1, t)
    zcr = librosa.feature.zero_crossing_rate(y, FRAME_LENGTH, HOP_LENGTH)
    features.append(zcr)

    # RMS energy, shape: (1, t)
    rmse = librosa.feature.rmse(S=mag)
    features.append(rmse)

    # spectral_centroid, shape: (1, t)
    spec_centroid = librosa.feature.spectral_centroid(S=mag, freq=freq)
    features.append(spec_centroid)

    # spectrual_bandwidth, shape: (1, t)
    spec_bandwidth = librosa.feature.spectral_bandwidth(S=mag, freq=freq, p=2)
    features.append(spec_bandwidth)

    # spectral_flatness, shape: (1, t)
    spec_flatness = librosa.feature.spectral_flatness(S=mag)
    features.append(spec_flatness)

    # spectral_rolloff, shape: (1, t)
    spec_rolloff = librosa.feature.spectral_rolloff(S=mag,
                                                    freq=freq,
                                                    roll_percent=0.85)
    features.append(spec_rolloff)

    result = np.concatenate(features, axis=0)  # (num_features, t)

    return result
예제 #35
0
def test_freq_ticks():

    def __test(locs, freqs, n_ticks, axis):

        if freqs is None:
            args = [locs]
            fmax = max(locs)
        else:
            args = [locs, freqs]
            fmax = max(freqs)

        plt.figure()
        (ticks, labels), fmt = librosa.display.frequency_ticks(*args,
                                                               axis=axis,
                                                               n_ticks=n_ticks)

        if n_ticks is None:
            n_ticks = len(locs)

        eq_(len(ticks), n_ticks)
        eq_(len(labels), n_ticks)

        if fmt == 'mHz':
            assert fmax <= 1e1
        elif fmt == 'Hz':
            assert fmax <= 1e4
        elif fmt == 'kHz':
            assert fmax <= 1e7
        elif fmt == 'MHz':
            assert fmax <= 1e10
        elif fmt == 'GHz':
            assert fmax > 1e10
        else:
            raise ValueError('Incorrect fmt={}'.format(fmt))

        if axis == 'x':
            cls = matplotlib.axis.XTick
        elif axis == 'y':
            cls = matplotlib.axis.YTick
        else:
            raise ValueError('Incorrect axis={}'.format(axis))
        
        assert all([isinstance(_, cls) for _ in ticks])


    for sr in [1e-3, 1e1, 1e3, 1e5, 1e8, 1e12]:
        locs = librosa.fft_frequencies(sr=sr, n_fft=32)
        
        for freqs in [None, locs]:
            for n_ticks in [3, 5, None]:
                for axis in ['x', 'y']:
                    yield __test, locs, freqs, n_ticks, axis

    yield raises(librosa.ParameterError)(__test), locs, freqs, n_ticks, 23
예제 #36
0
def test_spectral_rolloff_synthetic(S, freq, pct):

    sr = 22050
    rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr, freq=freq, roll_percent=pct)

    n_fft = 2 * (S.shape[0] - 1)
    if freq is None:
        freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    idx = np.floor(pct * freq.shape[0]).astype(int)
    assert np.allclose(rolloff, freq[idx])
예제 #37
0
    def __test(sr, n_fft):
        freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        # DC
        assert freqs[0] == 0

        # Nyquist, positive here for more convenient display purposes
        assert freqs[-1] == sr / 2.0

        # Ensure that the frequencies increase linearly
        dels = np.diff(freqs)
        assert np.allclose(dels, dels[0])
    def __test(sr, n_fft):
        freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

        # DC
        eq_(freqs[0], 0)

        # Nyquist, positive here for more convenient display purposes
        eq_(freqs[-1], sr / 2.0)

        # Ensure that the frequencies increase linearly
        dels = np.diff(freqs)
        assert np.allclose(dels, dels[0])
예제 #39
0
def test_poly_features_synthetic():

    srand()
    sr = 22050
    n_fft = 2048

    def __test(S, coeffs, freq):

        order = coeffs.shape[0] - 1
        p = librosa.feature.poly_features(S=S, sr=sr, n_fft=n_fft,
                                          order=order, freq=freq)

        for i in range(S.shape[-1]):
            assert np.allclose(coeffs, p[::-1, i].squeeze())

    def __make_data(coeffs, freq):
        S = np.zeros_like(freq)
        for i, c in enumerate(coeffs):
            S = S + c * freq**i

        S = S.reshape((freq.shape[0], -1))
        return S

    for order in range(1, 3):
        freq = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
        coeffs = np.atleast_1d(np.arange(1, 1+order))

        # First test: vanilla
        S = __make_data(coeffs, freq)
        yield __test, S, coeffs, None

        # And with explicit frequencies
        yield __test, S, coeffs, freq

        # And with alternate frequencies
        freq = freq**2.0
        S = __make_data(coeffs, freq)
        yield __test, S, coeffs, freq

        # And multi-dimensional
        freq = np.cumsum(np.abs(np.random.randn(1 + n_fft//2, 2)), axis=0)
        S = __make_data(coeffs, freq)
        yield __test, S, coeffs, freq
예제 #40
0
파일: filters.py 프로젝트: BWalburn/librosa
def logfrequency(sr, n_fft, bins_per_octave=12, tuning=0.0, fmin=None, fmax=None, spread=0.125):
    '''Approximate a constant-Q filterbank for a fixed-window STFT.
    
    Each filter is a log-normal window centered at the corresponding pitch frequency.
    
    :usage:
        >>> # Simple log frequency filters
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096)

        >>> # Use a narrower frequency range
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, fmin=110, fmax=880)

        >>> # Use narrower filters for sparser response: 5% of a semitone
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.05)
        >>> # Or wider: 50% of a semitone
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.5)

    :parameters:
      - sr : int > 0
          audio sampling rate
        
      - n_fft : int > 0
          FFT window size
        
      - bins_per_octave : int > 0
          Number of bins per octave. Defaults to 12 (semitones).
        
      - tuning : None or float in [-0.5, +0.5]
          Tuning correction parameter, in fractions of a bin.
        
      - fmin : float > 0
          Minimum frequency bin. Defaults to ``C1 ~= 16.35``
        
      - fmax : float > 0
          Maximum frequency bin. Defaults to ``C9 = 4816.01``
        
      - spread : float > 0
          Spread of each filter, as a fraction of a bin.
        
    :returns:
      - C : np.ndarray, shape=(ceil(log(fmax/fmin)) * bins_per_octave, 1 + n_fft/2)
          CQT filter bank.
    '''
    
    if fmin is None:
        fmin = librosa.midi_to_hz(librosa.note_to_midi('C1'))
        
    if fmax is None:
        fmax = librosa.midi_to_hz(librosa.note_to_midi('C9'))
    
    # Apply tuning correction
    correction = 2.0**(float(tuning) / bins_per_octave)
    
    # How many bins can we get?
    n_filters = int(np.ceil(bins_per_octave * np.log2(float(fmax) / fmin)))
    
    # What's the shape parameter for our log-normal filters?
    sigma = float(spread) / bins_per_octave
    
    # Construct the output matrix
    basis = np.zeros( (n_filters, n_fft /2  + 1) )
    
    # Get log frequencies of bins
    log_freqs = np.log2(librosa.fft_frequencies(sr, n_fft)[1:])
                                
    for i in range(n_filters):
        # What's the center (median) frequency of this filter?
        center_freq = correction * fmin * (2.0**(float(i)/bins_per_octave))
        
        # Place a log-normal window around center_freq
        # We skip the sqrt(2*pi) normalization because it will wash out below anyway
        basis[i, 1:] = np.exp(-0.5 * ((log_freqs - np.log2(center_freq)) /sigma)**2 - np.log2(sigma) - log_freqs)
                                  
        # Normalize each filter
        c_norm = np.sqrt(np.sum(basis[i]**2))
        if c_norm > 0:
            basis[i] = basis[i] / c_norm
        
    return basis
예제 #41
0
파일: filters.py 프로젝트: wnstlr/librosa
def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False):
    """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins

    :usage:
        >>> mel_fb = librosa.filters.mel(22050, 2048)

        >>> # Or clip the maximum frequency to 8KHz
        >>> mel_fb = librosa.filters.mel(22050, 2048, fmax=8000)

    :parameters:
      - sr        : int > 0
          sampling rate of the incoming signal

      - n_fft     : int > 0
          number of FFT components

      - n_mels    : int > 0
          number of Mel bands to generate

      - fmin      : float >= 0
          lowest frequency (in Hz)

      - fmax      : float >= 0
          highest frequency (in Hz).
          If ``None``, use ``fmax = sr / 2.0``

      - htk       : bool
          use HTK formula instead of Slaney

    :returns:
      - M         : np.ndarray, shape=(n_mels, 1+ n_fft/2)
          Mel transform matrix
    """

    if fmax is None:
        fmax = sr / 2.0

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft / 2)))

    # Center freqs of each FFT bin
    fftfreqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    freqs = librosa.mel_frequencies(n_mels,
                                    fmin=fmin,
                                    fmax=fmax,
                                    htk=htk,
                                    extra=True)

    # Slaney-style mel is scaled to be approx constant energy per channel
    enorm = 2.0 / (freqs[2:n_mels+2] - freqs[:n_mels])

    for i in xrange(n_mels):
        # lower and upper slopes for all bins
        lower = (fftfreqs - freqs[i]) / (freqs[i+1] - freqs[i])
        upper = (freqs[i+2] - fftfreqs) / (freqs[i+2] - freqs[i+1])

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper)) * enorm[i]

    return weights
예제 #42
0
파일: filters.py 프로젝트: wnstlr/librosa
def logfrequency(sr, n_fft, n_bins=84, bins_per_octave=12, tuning=0.0,
                 fmin=None, spread=0.125):
    '''Approximate a constant-Q filterbank for a fixed-window STFT.

    Each filter is a log-normal window centered at the corresponding frequency.

    :usage:
        >>> # Simple log frequency filters
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096)

        >>> # Use a narrower frequency range
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096,
                                                    n_bins=48, fmin=110)

        >>> # Use narrower filters for sparser response: 5% of a semitone
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.05)
        >>> # Or wider: 50% of a semitone
        >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.5)

    :parameters:
      - sr : int > 0
          audio sampling rate

      - n_fft : int > 0
          FFT window size

      - n_bins : int > 0
          Number of bins.  Defaults to 84 (7 octaves).

      - bins_per_octave : int > 0
          Number of bins per octave. Defaults to 12 (semitones).

      - tuning : None or float in [-0.5, +0.5]
          Tuning correction parameter, in fractions of a bin.

      - fmin : float > 0
          Minimum frequency bin. Defaults to ``C2 ~= 32.70``

      - spread : float > 0
          Spread of each filter, as a fraction of a bin.

    :returns:
      - C : np.ndarray, shape=(n_bins, 1 + n_fft/2)
          log-frequency filter bank.
    '''

    if fmin is None:
        fmin = librosa.midi_to_hz(librosa.note_to_midi('C2'))

    # Apply tuning correction
    correction = 2.0**(float(tuning) / bins_per_octave)

    # What's the shape parameter for our log-normal filters?
    sigma = float(spread) / bins_per_octave

    # Construct the output matrix
    basis = np.zeros((n_bins, 1 + n_fft/2))

    # Get log frequencies of bins
    log_freqs = np.log2(librosa.fft_frequencies(sr, n_fft)[1:])

    for i in range(n_bins):
        # What's the center (median) frequency of this filter?
        c_freq = correction * fmin * (2.0**(float(i)/bins_per_octave))

        # Place a log-normal window around c_freq
        basis[i, 1:] = np.exp(-0.5 * ((log_freqs - np.log2(c_freq)) / sigma)**2
                              - np.log2(sigma) - log_freqs)

        # Normalize each filter
        c_norm = np.sqrt(np.sum(basis[i]**2))
        if c_norm > 0:
            basis[i] = basis[i] / c_norm

    return basis