Python resample Examples

Programming Language: Python

Namespace/Package Name: librosa.core.audio

Method/Function: resample

Examples at hotexamples.com: 6

Python resample - 6 examples found. These are the top rated real world Python examples of librosa.core.audio.resample extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def __early_downsample(y, sr, hop_length, res_type, n_octaves, nyquist,
                       filter_cutoff, scale):
    '''Perform early downsampling on an audio signal, if it applies.'''

    downsample_count = __early_downsample_count(nyquist, filter_cutoff,
                                                hop_length, n_octaves)

    if downsample_count > 0 and res_type == 'kaiser_fast':
        downsample_factor = 2**(downsample_count)

        hop_length //= downsample_factor

        if len(y) < downsample_factor:
            raise ParameterError('Input signal length={:d} is too short for '
                                 '{:d}-octave CQT'.format(len(y), n_octaves))

        new_sr = sr / float(downsample_factor)
        y = audio.resample(y, sr, new_sr, res_type=res_type, scale=True)

        # If we're not going to length-scale after CQT, we
        # need to compensate for the downsampling factor here
        if not scale:
            y *= np.sqrt(downsample_factor)

        sr = new_sr

    return y, sr, hop_length

Example #2

Show file

def main():
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft as istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    channels = 8

    parameter_set = 'Katka'

    if parameter_set == 'Katka':
        sampling_rate = 16000
        stft_size, stft_shift = 512, 128
        delay = 3
        iterations = 5

        def get_K(f):
            return 10

    elif parameter_set == 'Yoshioka2012GeneralWPE':
        sampling_rate = 8000
        stft_size, stft_shift = 128, 64
        delay = 2
        iterations = 2

        def get_K(f):
            if center_frequencies[f] < 800:
                K = 18
            elif center_frequencies[f] < 1500:
                K = 15
            else:
                K = 12
            return K

    else:
        raise ValueError

    file_template = 'AMI_WSJ20-Array1-{}_T10c0201.wav'
    signal_list = [
        sf.read(str(project_root / 'data' / file_template.format(d + 1)))[0]
        for d in range(channels)
    ]
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_size, sampling_rate)

    Y = stft(y, size=stft_size, shift=stft_shift)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        K = get_K(f)
        X[:, :, f] = wpe_v5(Y[:, :, f],
                            K=K,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_size, shift=stft_shift)

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)

Example #3

Show file

File: wpe.py Project: jackdeadman/nara_wpe

def main(channels, sampling_rate, file_template, taps_frequency_dependent,
         delay, iterations):
    """
    User interface for WPE. The defaults of the command line interface are
    suited for example audio files of nara_wpe.

     'Yoshioka2012GeneralWPE'
        sampling_rate = 8000
        delay = 2
        iterations = 2

    """
    from nara_wpe import project_root
    import soundfile as sf
    from nara_wpe.utils import stft
    from nara_wpe.utils import istft
    from nara_wpe.utils import get_stft_center_frequencies
    from tqdm import tqdm
    from librosa.core.audio import resample

    stft_options = dict(size=512,
                        shift=128,
                        window_length=None,
                        fading=True,
                        pad=True,
                        symmetric_window=False)

    def get_taps(f, mode=taps_frequency_dependent):
        if mode:
            if center_frequencies[f] < 800:
                taps = 18
            elif center_frequencies[f] < 1500:
                taps = 15
            else:
                taps = 12
        else:
            taps = 10
        return taps

    if file_template == 'AMI_WSJ20-Array1-{}_T10c0201.wav':
        signal_list = [
            sf.read(str(project_root / 'data' /
                        file_template.format(d + 1)))[0]
            for d in range(channels)
        ]
    else:
        signal = sf.read(file_template)[0].transpose(1, 0)
        signal_list = list(signal)
    signal_list = [resample(x_, 16000, sampling_rate) for x_ in signal_list]
    y = np.stack(signal_list, axis=0)

    center_frequencies = get_stft_center_frequencies(stft_options['size'],
                                                     sampling_rate)

    Y = stft(y, **stft_options)

    X = np.copy(Y)
    D, T, F = Y.shape
    for f in tqdm(range(F), total=F):
        taps = get_taps(f)
        X[:, :, f] = wpe_v7(Y[:, :, f],
                            taps=taps,
                            delay=delay,
                            iterations=iterations)

    x = istft(X, size=stft_options['size'], shift=stft_options['shift'])

    sf.write(str(project_root / 'data' / 'wpe_out.wav'),
             x[0],
             samplerate=sampling_rate)
    print('Output in {}'.format(str(project_root / 'data' / 'wpe_out.wav')))

Example #4

Show file

def icqt(C,
         sr=22050,
         hop_length=512,
         fmin=None,
         n_bins=84,
         bins_per_octave=12,
         filter_scale=1,
         norm=1,
         sparsity=0.01,
         window='hann',
         scale=True,
         pad_mode='reflect',
         use_smoothing=True):

    tuning = 0.0
    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    if scale:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)
        C *= np.sqrt(lengths[:, np.newaxis])

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
    nyquist = sr / 2.0
    if filter_cutoff < audio.BW_FASTEST * nyquist:
        res_type = 'kaiser_fast'
    else:
        res_type = 'kaiser_best'

    y = np.zeros((1000, ))
    y, sr, hop_length = __early_downsample(y, sr, hop_length, res_type,
                                           n_octaves, nyquist, filter_cutoff,
                                           scale)

    cqt_resp = []

    for i in range(n_octaves):
        cqt_resp += [
            C[i * bins_per_octave:i * bins_per_octave + bins_per_octave, :]
        ]

    cqt_resp = cqt_resp[::-1]

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        fft_basis = np.linalg.pinv(fft_basis)
        # Compute the CQT filter response and append it to the stack
        y = __icqt_response(cqt_resp[0], n_fft, hop_length, fft_basis,
                            pad_mode)

        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 +
                                  0.5 * filters.window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    fft_basis = np.linalg.pinv(fft_basis)

    my_y, my_sr, my_hop = y, sr, hop_length

    y = 0.0
    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:

            #my_y = audio_resample_tf(my_y, my_sr, my_sr/2.0,
            #                      res_type=res_type,
            #                      scale=True, use_smoothing=use_smoothing)

            # The re-scale the filters to compensate for downsampling
            fft_basis /= np.sqrt(2)

            my_sr /= 2.0
            my_hop //= 2

            # Compute the cqt filter response and append to the stack
            my_y = __icqt_response(cqt_resp[i], n_fft, my_hop, fft_basis,
                                   pad_mode)
            my_y = audio.resample(my_y,
                                  my_sr,
                                  sr,
                                  res_type=res_type,
                                  scale=True)
            y += my_y

        else:
            my_y = __icqt_response(cqt_resp[i], n_fft, my_hop, fft_basis,
                                   pad_mode)
            y += my_y

        print('Octave:', i)
        print('y.size:', my_y.size)
        print('SR:', my_sr)
        print('Hop:', my_hop)
        print('New SR:', sr)
    return y

Example #5

Show file

def icqt(C,
         sr=22050,
         hop_length=512,
         fmin=None,
         bins_per_octave=12,
         tuning=0.0,
         filter_scale=1,
         norm=1,
         sparsity=0.01,
         window='hann',
         scale=True,
         amin=1e-6):
    '''Compute the inverse constant-Q transform.
    Given a constant-Q transform representation `C` of an audio signal `y`,
    this function produces an approximation `y_hat`.
    .. warning:: This implementation is unstable, and subject to change in
                 future versions of librosa.  We recommend that its use be
                 limited to sonification and diagnostic applications.
    Parameters
    ----------
    C : np.ndarray, [shape=(n_bins, n_frames)]
        Constant-Q representation as produced by `core.cqt`
    hop_length : int > 0 [scalar]
        number of samples between successive frames
    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz
    tuning : float in `[-0.5, 0.5)` [scalar]
        Tuning offset in fractions of a bin (cents).
    filter_scale : float > 0 [scalar]
        Filter scale factor. Small values (<1) use shorter windows
        for improved time resolution.
    norm : {inf, -inf, 0, float > 0}
        Type of norm to use for basis function normalization.
        See `librosa.util.normalize`.
    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.
        Set `sparsity=0` to disable sparsification.
    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.
    scale : bool
        If `True`, scale the CQT response by square-root the length
        of each channel's filter. This is analogous to `norm='ortho'` in FFT.
        If `False`, do not scale the CQT. This is analogous to `norm=None`
        in FFT.
    amin : float or None
        When applying squared window normalization, sample positions with
        coefficients below `amin` will left as is.
        If `None`, then `amin` is inferred as the smallest valid floating
        point value.
    Returns
    -------
    y : np.ndarray, [shape=(n_samples), dtype=np.float]
        Audio time-series reconstructed from the CQT representation.
    See Also
    --------
    cqt
    Notes
    -----
    This function caches at level 40.
    Examples
    --------
    Using default parameters
    >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=15)
    >>> C = librosa.cqt(y=y, sr=sr)
    >>> y_hat = librosa.icqt(C=C, sr=sr)
    Or with a different hop length and frequency resolution:
    >>> hop_length = 256
    >>> bins_per_octave = 12 * 3
    >>> C = librosa.cqt(y=y, sr=sr, hop_length=256, n_bins=7*bins_per_octave,
    ...                 bins_per_octave=bins_per_octave)
    >>> y_hat = librosa.icqt(C=C, sr=sr, hop_length=hop_length,
    ...                 bins_per_octave=bins_per_octave)
    '''
    warnings.warn(
        'librosa.icqt is unstable, and subject to change in future versions. '
        'Please use with caution.')

    n_bins, n_frames = C.shape
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))

    if amin is None:
        amin = util.tiny(C)

    if fmin is None:
        fmin = note_to_hz('C1')

    freqs = cqt_frequencies(n_bins,
                            fmin,
                            bins_per_octave=bins_per_octave,
                            tuning=tuning)[-bins_per_octave:]

    fmin_t = np.min(freqs)

    # Make the filter bank
    basis, lengths = filters.constant_q(sr=sr,
                                        fmin=fmin_t,
                                        n_bins=bins_per_octave,
                                        bins_per_octave=bins_per_octave,
                                        filter_scale=filter_scale,
                                        tuning=tuning,
                                        norm=norm,
                                        window=window,
                                        pad_fft=True)
    n_fft = basis.shape[1]

    # The extra factor of lengths**0.5 corrects for within-octave tapering
    basis = basis * np.sqrt(lengths[:, np.newaxis])

    # Estimate the gain per filter
    bdot = basis.conj().dot(basis.T)
    bscale = np.sum(np.abs(bdot), axis=1)

    n_trim = basis.shape[1] // 2

    if scale:
        Cnorm = np.ones(n_bins)[:, np.newaxis]
    else:
        Cnorm = filters.constant_q_lengths(sr=sr,
                                           fmin=fmin,
                                           n_bins=n_bins,
                                           bins_per_octave=bins_per_octave,
                                           filter_scale=filter_scale,
                                           tuning=tuning,
                                           window=window)[:, np.newaxis]**0.5

    y = None

    # Revised algorithm:
    #   for each octave
    #      upsample old octave
    #      @--numba accelerate this loop?
    #      for each basis
    #         convolve with activation (valid-mode)
    #         divide by window sumsquare
    #         trim and add to total

    for octave in range(n_octaves - 1, -1, -1):
        # Compute the slice index for the current octave
        slice_ = slice(-(octave + 1) * bins_per_octave - 1,
                       -(octave) * bins_per_octave - 1)

        # Project onto the basis
        C_oct = C[slice_] / Cnorm[slice_]
        basis_oct = basis[-C_oct.shape[0]:]

        y_oct = None

        # Make a dummy activation
        oct_hop = hop_length // 2**octave
        n = n_fft + (C_oct.shape[1] - 1) * oct_hop

        for i in range(basis_oct.shape[0] - 1, -1, -1):
            wss = filters.window_sumsquare(window,
                                           n_frames,
                                           hop_length=oct_hop,
                                           win_length=int(lengths[i]),
                                           n_fft=n_fft,
                                           norm=norm)

            wss *= lengths[i]**2

            # Construct the response for this filter
            y_oct_i = np.zeros(n, dtype=C_oct.dtype)
            __activation_fill(y_oct_i, basis_oct[i], C_oct[i], oct_hop)
            # Retain only the real part
            # Only do window normalization for sufficiently large window
            # coefficients
            y_oct_i = y_oct_i.real / np.maximum(amin, wss)

            if y_oct is None:
                y_oct = y_oct_i
            else:
                y_oct += y_oct_i

        # Remove the effects of zero-padding
        y_oct = y_oct[n_trim:-n_trim] * bscale[i]

        if y is None:
            y = y_oct
        else:
            # Up-sample the previous buffer and add in the new one
            # Scipy-resampling is fast here, since it's a power-of-two relation
            y = audio.resample(y, 1, 2, scale=True, res_type='scipy') + y_oct

    return y

Example #6

Show file

def cqt(y,
        sr=22050,
        hop_length=512,
        fmin=None,
        n_bins=84,
        bins_per_octave=12,
        tuning=0.0,
        filter_scale=1,
        norm=1,
        sparsity=0.01,
        window='hann',
        scale=True,
        pad_mode='reflect',
        res_type='scipy'):
    '''Compute the constant-Q transform of an audio signal.
    This implementation is based on the recursive sub-sampling method
    described by [1]_.
    .. [1] Schoerkhuber, Christian, and Anssi Klapuri.
        "Constant-Q transform toolbox for music processing."
        7th Sound and Music Computing Conference, Barcelona, Spain. 2010.
    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series
    sr : number > 0 [scalar]
        sampling rate of `y`
    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.
    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz
    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`
    bins_per_octave : int > 0 [scalar]
        Number of bins per octave
    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).
        If `None`, tuning will be automatically estimated from the signal.
    filter_scale : float > 0
        Filter scale factor. Small values (<1) use shorter windows
        for improved time resolution.
    norm : {inf, -inf, 0, float > 0}
        Type of norm to use for basis function normalization.
        See `librosa.util.normalize`.
    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.
        Set `sparsity=0` to disable sparsification.
    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.
    scale : bool
        If `True`, scale the CQT response by square-root the length of
        each channel's filter.  This is analogous to `norm='ortho'` in FFT.
        If `False`, do not scale the CQT. This is analogous to
        `norm=None` in FFT.
    pad_mode : string
        Padding mode for centered frame analysis.
        See also: `librosa.core.stft` and `np.pad`.
    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.complex or np.float]
        Constant-Q value each frequency at each time.
    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`
        Or if `y` is too short to support the frequency range of the CQT.
    See Also
    --------
    librosa.core.resample
    librosa.util.normalize
    Notes
    -----
    This function caches at level 20.
    Examples
    --------
    Generate and plot a constant-Q power spectrum
    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> C = np.abs(librosa.cqt(y, sr=sr))
    >>> librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
    ...                          sr=sr, x_axis='time', y_axis='cqt_note')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Constant-Q power spectrum')
    >>> plt.tight_layout()
    Limit the frequency range
    >>> C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60))
    >>> C
    array([[  8.827e-04,   9.293e-04, ...,   3.133e-07,   2.942e-07],
           [  1.076e-03,   1.068e-03, ...,   1.153e-06,   1.148e-06],
           ...,
           [  1.042e-07,   4.087e-07, ...,   1.612e-07,   1.928e-07],
           [  2.363e-07,   5.329e-07, ...,   1.294e-07,   1.611e-07]])
    Using a higher frequency resolution
    >>> C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60 * 2, bins_per_octave=12 * 2))
    >>> C
    array([[  1.536e-05,   5.848e-05, ...,   3.241e-07,   2.453e-07],
           [  1.856e-03,   1.854e-03, ...,   2.397e-08,   3.549e-08],
           ...,
           [  2.034e-07,   4.245e-07, ...,   6.213e-08,   1.463e-07],
           [  4.896e-08,   5.407e-07, ...,   9.176e-08,   1.051e-07]])
    '''

    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    len_orig = len(y)

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
    nyquist = sr / 2.0

    y, sr, hop_length = __early_downsample(y, sr, hop_length, res_type,
                                           n_octaves, nyquist, filter_cutoff,
                                           scale)

    cqt_resp = []

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        # Compute the CQT filter response and append it to the stack
        cqt_resp.append(
            __cqt_response(y, n_fft, hop_length, fft_basis, pad_mode))

        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 +
                                  0.5 * filters.window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    my_y, my_sr, my_hop = y, sr, hop_length

    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:
            if len(my_y) < 2:
                raise ParameterError('Input signal length={} is too short for '
                                     '{:d}-octave CQT'.format(
                                         len_orig, n_octaves))

            my_y = audio.resample(my_y,
                                  my_sr,
                                  my_sr / 2.0,
                                  res_type=res_type,
                                  scale=True)
            # The re-scale the filters to compensate for downsampling
            fft_basis[:] *= np.sqrt(2)

            my_sr /= 2.0
            my_hop //= 2

        # Compute the cqt filter response and append to the stack
        cqt_resp.append(
            __cqt_response(my_y, n_fft, my_hop, fft_basis, pad_mode))

    C = __trim_stack(cqt_resp, n_bins)

    if scale:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)
        C /= np.sqrt(lengths[:, np.newaxis])

    return C