Esempio n. 1
0
def time_stretch(y, rate, **kwargs):
    '''Time-stretch an audio series by a fixed rate.


    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    rate : float > 0 [scalar]
        Stretch factor.  If `rate > 1`, then the signal is sped up.
        If `rate < 1`, then the signal is slowed down.

    kwargs : additional keyword arguments.
        See `librosa.decompose.stft` for details.

    Returns
    -------
    y_stretch : np.ndarray [shape=(round(n/rate),)]
        audio time series stretched by the specified rate

    See Also
    --------
    pitch_shift : pitch shifting
    librosa.core.phase_vocoder : spectrogram phase vocoder
    pyrubberband.pyrb.time_stretch : high-quality time stretching using RubberBand

    Examples
    --------
    Compress to be twice as fast

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_fast = librosa.effects.time_stretch(y, 2.0)

    Or half the original speed

    >>> y_slow = librosa.effects.time_stretch(y, 0.5)

    '''

    if rate <= 0:
        raise ParameterError('rate must be a positive number')

    # Construct the short-term Fourier transform (STFT)
    stft = stft(y, **kwargs)

    # Stretch by phase vocoding
    stft_stretch = phase_vocoder(stft, rate)

    # Predict the length of y_stretch
    len_stretch = int(round(len(y) / rate))

    # Invert the STFT
    y_stretch = istft(stft_stretch,
                      dtype=y.dtype,
                      length=len_stretch,
                      **kwargs)

    return y_stretch
Esempio n. 2
0
def __cqt_response(y, n_fft, hop_length, fft_basis, mode):
    '''Compute the filter response with a target STFT hop.'''

    # Compute the STFT matrix
    D = stft(y,
             n_fft=n_fft,
             hop_length=hop_length,
             window=np.ones,
             pad_mode=mode)

    # And filter response energy
    return fft_basis.dot(D)
Esempio n. 3
0
def hpss(y, **kwargs):
    '''harmonic percussive source separation (HPSS)
       Decompose an audio time series into harmonic and percussive components.

    This function automates the STFT->HPSS->ISTFT pipeline, and ensures that
    the output waveforms have equal length to the input waveform `y`.


    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series
    kwargs : additional keyword arguments.
        See `librosa.decompose.hpss` for details.


    Returns
    -------
    y_harmonic : np.ndarray [shape=(n,)]
        audio time series of the harmonic elements

    y_percussive : np.ndarray [shape=(n,)]
        audio time series of the percussive elements

    See Also
    --------
    harmonic : Extract only the harmonic component
    percussive : Extract only the percussive component
    librosa.decompose.hpss : HPSS on spectrograms


    Examples
    --------
    >>> # Extract harmonic and percussive components
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_harmonic, y_percussive = librosa.effects.hpss(y)

    >>> # Get a more isolated percussive component by widening its margin
    >>> y_harmonic, y_percussive = librosa.effects.hpss(y, margin=(1.0,5.0))

    '''

    # Compute the STFT matrix
    stft = stft(y)

    # Decompose into harmonic and percussives
    stft_harm, stft_perc = hpss(stft, **kwargs)

    # Invert the STFTs.  Adjust length to match the input.
    y_harm = fix_length(istft(stft_harm, dtype=y.dtype), len(y))
    y_perc = fix_length(istft(stft_perc, dtype=y.dtype), len(y))

    return y_harm, y_perc
Esempio n. 4
0
def percussive(y, **kwargs):
    '''Extract percussive elements from an audio time-series.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series
    kwargs : additional keyword arguments.
        See `librosa.decompose.hpss` for details.

    Returns
    -------
    y_percussive : np.ndarray [shape=(n,)]
        audio time series of just the percussive portion

    See Also
    --------
    hpss : Separate harmonic and percussive components
    harmonic : Extract only the harmonic component
    librosa.decompose.hpss : HPSS for spectrograms

    Examples
    --------
    >>> # Extract percussive component
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_percussive = librosa.effects.percussive(y)

    >>> # Use a margin > 1.0 for greater percussive separation
    >>> y_percussive = librosa.effects.percussive(y, margin=3.0)

    '''

    # Compute the STFT matrix
    stft = stft(y)

    # Remove harmonics
    stft_perc = hpss(stft, **kwargs)[1]

    # Invert the STFT
    y_perc = fix_length(istft(stft_perc, dtype=y.dtype), len(y))

    return y_perc
Esempio n. 5
0
def pseudo_cqt(y,
               sr=22050,
               hop_length=512,
               fmin=None,
               n_bins=84,
               bins_per_octave=12,
               tuning=0.0,
               filter_scale=1,
               norm=1,
               sparsity=0.01,
               window='hann',
               scale=True,
               pad_mode='reflect'):
    '''Compute the pseudo constant-Q transform of an audio signal.

    This uses a single fft size that is the smallest power of 2 that is greater
    than or equal to the max of:

        1. The longest CQT filter
        2. 2x the hop_length

    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    sr : number > 0 [scalar]
        sampling rate of `y`

    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.

    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz

    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`

    bins_per_octave : int > 0 [scalar]
        Number of bins per octave

    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).

        If `None`, tuning will be automatically estimated from the signal.

    filter_scale : float > 0
        Filter filter_scale factor. Larger values use longer windows.

    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.

        Set `sparsity=0` to disable sparsification.

    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.

    pad_mode : string
        Padding mode for centered frame analysis.

        See also: `librosa.core.stft` and `np.pad`.

    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
        Pseudo Constant-Q energy for each frequency at each time.

    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`

        Or if `y` is too short to support the frequency range of the CQT.

    Notes
    -----
    This function caches at level 20.

    '''

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin,
                                           n_bins,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           hop_length=hop_length,
                                           window=window)

    fft_basis = np.abs(fft_basis)

    # Compute the magnitude STFT with Hann window
    D = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length, pad_mode=pad_mode))

    # Project onto the pseudo-cqt basis
    C = fft_basis.dot(D)

    if scale:
        C /= np.sqrt(n_fft)
    else:
        lengths = constant_q_lengths(sr,
                                     fmin,
                                     n_bins=n_bins,
                                     bins_per_octave=bins_per_octave,
                                     tuning=tuning,
                                     window=window,
                                     filter_scale=filter_scale)

        C *= np.sqrt(lengths[:, np.newaxis] / n_fft)

    return C
Esempio n. 6
0
def detectOnset(y,
                peakThresh,
                peakWait,
                hop_length=512,
                sr=48000,
                backtrack=False,
                plots=1,
                **kwargs):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope.

    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.

    .. [1] https://github.com/CPJKU/onset_db


    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series
        
    peakThresh : controls threshold of onset detection
        (minimum 0.05 ~ 9.0(?))
    
    peakWait   : controls spacing of onset detections
        (minimum 0.03 ~ .wav length(?)) - long wait = fewer onsets

    sr         : number > 0 [scalar]
        sampling rate of `y`

    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope

    hop_length : int > 0 [scalar]
        hop length (in samples)

    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.

    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.

        This is primarily useful when using onsets as slice points for segmentation.

    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.

    kwargs : placeholder for internal use (additional keyword arguments
        Additional parameters for peak picking.)

        See `librosa.util.peak_pick` for details.


    Returns
    -------

    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.

        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.


    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided

        or if `units` is not one of 'frames', 'samples', or 'time'

    See Also
    --------
    onset_strength : compute onset strength per-frame
    onset_backtrack : backtracking onset events
    librosa.util.peak_pick : pick peaks from a time series


    Examples
    --------
    Get onset times from a signal

    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      offset=30, duration=2.0)
    >>> onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
    >>> librosa.frames_to_time(onset_frames, sr=sr)
    array([ 0.07 ,  0.395,  0.511,  0.627,  0.766,  0.975,
            1.207,  1.324,  1.44 ,  1.788,  1.881])

    Or use a pre-computed onset envelope

    >>> o_env = librosa.onset.onset_strength(y, sr=sr)
    >>> times = librosa.frames_to_time(np.arange(len(o_env)), sr=sr)
    >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)

    """

    onset_env = onset_strength(y=y,
                               sr=sr,
                               hop_length=hop_length,
                               aggregate=np.median)

    # peak_pick

    #peaks = peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10)

    #    pre_max   : int >= 0 [scalar]
    #        number of samples before `n` over which max is computed
    #
    #    post_max  : int >= 1 [scalar]
    #        number of samples after `n` over which max is computed
    #
    #    pre_avg   : int >= 0 [scalar]
    #        number of samples before `n` over which mean is computed
    #
    #    post_avg  : int >= 1 [scalar]
    #        number of samples after `n` over which mean is computed
    #
    #    delta     : float >= 0 [scalar]
    #        threshold offset for mean
    #
    #    wait      : int >= 0 [scalar]
    #        number of samples to wait after picking a peak
    #
    #    Returns
    #    -------
    #    peaks     : np.ndarray [shape=(n_peaks,), dtype=int]
    #        indices of peaks in `x`

    #peaks = peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10)
    #peaks = peak_pick(onset_env, 6, 6, 6, 6, 0.5, 8)
    #peaks = peak_pick(onset_env, 7, 7, 7, 7, 0.5, 7)
    #peaks = peak_pick(onset_env, 9, 9, 9, 9, 0.5, 7)
    #peaks = peak_pick(onset_env, 12, 12, 12, 12, 0.5, 6)
    #peaks = peak_pick(onset_env, 32, 32, 32, 32, 0.5, 32)
    #peaks = peak_pick(onset_env, 64, 64, 64, 64, 0.5, 64)

    #peaks = peak_pick(onset_env, pkctrl, pkctrl, pkctrl, pkctrl, 0.5, pkctrl)

    #peak_onsets_ch1 = np.array(onset_env_ch1)[peaks_ch1]
    #peak_onsets_ch2 = np.array(onset_env_ch2)[peaks_ch2]

    # These parameter settings found by large-scale search
    # kwargs.setdefault('pre_max', 0.03*sr//hop_length)       # 30ms
    # kwargs.setdefault('post_max', 0.00*sr//hop_length + 1)  # 0ms
    # kwargs.setdefault('pre_avg', 0.10*sr//hop_length)       # 100ms
    # kwargs.setdefault('post_avg', 0.10*sr//hop_length + 1)  # 100ms
    # kwargs.setdefault('wait', 0.03*sr//hop_length)          # 30ms
    # kwargs.setdefault('delta', 0.07)

    kwargs.setdefault('pre_max', 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault('post_max', 0.00 * sr // hop_length + 1)  # 0ms
    kwargs.setdefault('pre_avg', 0.10 * sr // hop_length)  # 100ms
    kwargs.setdefault('post_avg', 0.10 * sr // hop_length + 1)  # 100ms
    #kwargs.setdefault('wait', 0.03*sr//hop_length)         # 30ms
    kwargs.setdefault('wait', peakWait * sr // hop_length)  # 30ms
    kwargs.setdefault('delta', peakThresh)

    # Peak pick the onset envelope
    onsets = peak_pick(onset_env, **kwargs)

    # Optionally backtrack the events
    if backtrack:
        onsets = onset_backtrack(onsets, onset_env)

    onsets_samples = frames_to_samples(onsets, hop_length=hop_length)
    onsets_time = frames_to_time(onsets, hop_length=hop_length, sr=sr)

    # // *-----------------------------------------------------------------* //
    # // *--- Calculate Peak Regions (# frames of peak regions) ---*

    # peak_regions = get_peak_regions(peaks, len(onset_env))

    # // *--- Plot - source signal ---*

    if plots > 1:

        fnum = 3
        pltTitle = 'Input Signals: aSrc_ch1'
        pltXlabel = 'sinArray time-domain wav'
        pltYlabel = 'Magnitude'

        # define a linear space from 0 to 1/2 Fs for x-axis:
        xaxis = np.linspace(0, len(y), len(y))

        xodplt.xodPlot1D(fnum, y, xaxis, pltTitle, pltXlabel, pltYlabel)

    # // *-----------------------------------------------------------------* //
    # // *--- Plot Peak-Picking results vs. Spectrogram ---*

    if plots > 0:

        # // *-----------------------------------------------------------------* //
        # // *--- Perform the STFT ---*

        NFFT = 2048

        ySTFT = stft(y, NFFT)

        assert (ySTFT.shape[1] == len(onset_env)
                ), "Number of STFT frames != len onset_env"

        #times_ch1 = frames_to_time(np.arange(len(onset_env_ch1)), fs, hop_length=512)
        # currently uses fixed hop_length
        times = frames_to_time(np.arange(len(onset_env)), sr, NFFT / 4)

        plt.figure(facecolor='silver', edgecolor='k', figsize=(12, 8))
        ax = plt.subplot(2, 1, 1)
        specshow(amplitude_to_db(magphase(ySTFT)[0], ref=np.max),
                 y_axis='log',
                 x_axis='time',
                 cmap=plt.cm.viridis)
        plt.title('CH1: Spectrogram (STFT)')

        plt.subplot(2, 1, 2, sharex=ax)
        plt.plot(times, onset_env, alpha=0.66, label='Onset strength')
        plt.vlines(times[onsets],
                   0,
                   onset_env.max(),
                   color='r',
                   alpha=0.8,
                   label='Selected peaks')
        plt.legend(frameon=True, framealpha=0.66)
        plt.axis('tight')
        plt.tight_layout()

        plt.xlabel('time')
        plt.ylabel('Amplitude')
        plt.title('Onset Strength detection & Peak Selection')

    plt.show()

    return onsets_samples, onsets_time