Example #1
0
def onset_strength(y=None,
                   sr=22050,
                   S=None,
                   detrend=False,
                   centering=True,
                   feature=None,
                   aggregate=None,
                   **kwargs):
    """Compute a spectral flux onset strength envelope.

    Onset strength at time t is determined by:

    ``mean_f max(0, S[f, t+1] - S[f, t])``

    By default, if a time series ``y`` is provided, S will be the
    log-power Mel spectrogram.

    :usage:
        >>> # Mean aggregation with Mel-scaled spectrogram
        >>> o_env = librosa.onset.onset_strength(y, sr)

        >>> # Median aggregation
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 aggregate=np.median)

        >>> # Log-frequency spectrogram instead of Mel
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 feature=librosa.feature.logfsgram)

        >>> # Or Mel spectrogram with customized options
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 n_mels=128,
                                                 fmin=32,
                                                 fmax=8000)

    :parameters:
      - y        : np.ndarray [shape=(n,)]
          audio time-series

      - sr       : int > 0 [scalar]
          sampling rate of ``y``

      - S        : np.ndarray [shape=(d, m)]
          pre-computed (log-power) spectrogram

      - detrend : bool [scalar]
          Filter the onset strength to remove the DC component

      - centering : bool [scalar]
          Shift the onset function by ``n_fft / (2 * hop_length)`` frames

      - feature : function
          Function for computing time-series features, eg, scaled spectrograms.
          By default, uses :func:`librosa.feature.melspectrogram`

      - aggregate : function
          Aggregation function to use when combining onsets
          at different frequency bins.
          Default: ``np.mean``

      - *kwargs*
          Additional parameters to ``feature()``, if ``S`` is not provided.

    .. note:: if ``S`` is provided, then ``(y, sr)`` are optional.

    :returns:
      - onset_envelope   : np.ndarray [shape=(m,)]
          vector containing the onset strength envelope

    :raises:
      - ValueError
          if neither ``(y, sr)`` nor ``S`` are provided
    """

    if feature is None:
        feature = librosa.feature.melspectrogram

    if aggregate is None:
        aggregate = np.mean

    # First, compute mel spectrogram
    if S is None:
        if y is None:
            raise ValueError('One of "S" or "y" must be provided.')

        S = np.abs(feature(y=y, sr=sr, **kwargs))

        # Convert to dBs
        S = librosa.core.logamplitude(S)

    # Retrieve the n_fft and hop_length,
    # or default values for onsets if not provided
    n_fft = kwargs.get('n_fft', 2048)
    hop_length = kwargs.get('hop_length', 64)

    # Compute first difference, include padding for alignment purposes
    onset_env = np.diff(S, axis=1)
    onset_env = np.pad(onset_env, ([0, 0], [1, 0]), mode='constant')

    # Discard negatives (decreasing amplitude)
    onset_env = np.maximum(0.0, onset_env)

    # Average over mel bands
    onset_env = aggregate(onset_env, axis=0)

    # Counter-act framing effects. Shift the onsets by n_fft / hop_length
    if centering:
        onset_env = np.pad(onset_env, (int(n_fft / (2 * hop_length)), 0),
                           mode='constant')

    # remove the DC component
    if detrend:
        onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env)

    return onset_env
Example #2
0
def onset_strength(y=None, sr=22050, S=None, detrend=False, centering=True,
                   feature=None, aggregate=None, **kwargs):
    """Compute a spectral flux onset strength envelope.

    Onset strength at time t is determined by:

    ``mean_f max(0, S[f, t+1] - S[f, t])``

    By default, if a time series ``y`` is provided, S will be the
    log-power Mel spectrogram.

    :usage:
        >>> # Mean aggregation with Mel-scaled spectrogram
        >>> o_env = librosa.onset.onset_strength(y, sr)

        >>> # Median aggregation
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 aggregate=np.median)

        >>> # Log-frequency spectrogram instead of Mel
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 feature=librosa.feature.logfsgram)

        >>> # Or Mel spectrogram with customized options
        >>> o_env = librosa.onset.onset_strength(y, sr,
                                                 n_mels=128,
                                                 fmin=32,
                                                 fmax=8000)

    :parameters:
      - y        : np.ndarray
          audio time-series

      - sr       : int
          sampling rate of ``y``

      - S        : np.ndarray
          pre-computed (log-power) spectrogram

      - detrend : bool
          Filter the onset strength to remove the DC component

      - centering : bool
          Shift the onset function by ``n_fft / (2 * hop_length)`` frames

      - feature : function
          Function for computing time-series features, eg, scaled spectrograms.
          By default, uses ``librosa.feature.melspectrogram``

      - aggregate : function
          Aggregation function to use when combining onsets
          at different frequency bins.
          Default: ``np.mean``

      - *kwargs*
          Additional parameters to ``feature()``, if ``S`` is not provided.

    .. note:: if ``S`` is provided, then ``(y, sr)`` are optional.

    :returns:
      - onset_envelope   : np.ndarray
          vector containing the onset strength envelope

    :raises:
      - ValueError
          if neither ``(y, sr)`` nor ``S`` are provided
    """

    if feature is None:
        feature = librosa.feature.melspectrogram

    if aggregate is None:
        aggregate = np.mean

    # First, compute mel spectrogram
    if S is None:
        if y is None:
            raise ValueError('One of "S" or "y" must be provided.')

        S = np.abs(feature(y=y, sr=sr, **kwargs))

        # Convert to dBs
        S = librosa.core.logamplitude(S)

    # Retrieve the n_fft and hop_length,
    # or default values for onsets if not provided
    n_fft = kwargs.get('n_fft', 2048)
    hop_length = kwargs.get('hop_length', 64)

    # Compute first difference, include padding for alignment purposes
    onset_env = np.diff(S, axis=1)
    onset_env = np.pad(onset_env, ([0, 0], [1, 0]), mode='constant')

    # Discard negatives (decreasing amplitude)
    onset_env = np.maximum(0.0, onset_env)

    # Average over mel bands
    onset_env = aggregate(onset_env, axis=0)

    # Counter-act framing effects. Shift the onsets by n_fft / hop_length
    if centering:
        onset_env = np.pad(onset_env,
                           (n_fft / (2 * hop_length), 0),
                           mode='constant')

    # remove the DC component
    if detrend:
        onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env)

    return onset_env
Example #3
0
def onset_strength(
    y=None, sr=22050, S=None, detrend=False, feature=librosa.feature.melspectrogram, aggregate=np.mean, **kwargs
):
    """Spectral flux onset strength.

    Onset strength at time t is determined by:

    ``mean_f max(0, S[f, t+1] - S[f, t])``

    By default, if a time series is provided, S will be the log-power Mel spectrogram.

    :usage:
        >>> # Mean aggregation with Mel-scaled spectrogram
        >>> onsets = librosa.onset.onset_strength(y, sr)

        >>> # Median aggregation
        >>> onsets = librosa.onset.onset_strength(y, sr, aggregate=np.median)

        >>> # Log-frequency spectrogram instead of Mel
        >>> onsets = librosa.onset.onset_strength(y, sr, feature=librosa.feature.logfsgram)

        >>> # Or Mel spectrogram with customized options
        >>> onsets = librosa.onset.onset_strength(y, sr, n_mels=128, fmin=32, fmax=8000)

    :parameters:
      - y        : np.ndarray
          audio time-series

      - sr       : int
          sampling rate of ``y``

      - S        : np.ndarray 
          pre-computed (log-power) spectrogram
      
      - detrend : bool
          Filter the onset strength to remove 

      - feature : function
          Function for computing time-series features, eg, scaled spectrograms.
          By default, uses ``librosa.feature.melspectrogram``

      - aggregate : function
          Aggregation function to use when combining onsets
          at different frequency bins.

      - kwargs  
          Parameters to ``feature()``, if ``S`` is not provided.

    .. note:: if ``S`` is provided, then ``(y, sr)`` are optional.

    :returns:
      - onsets   : np.ndarray 
          vector of onset strengths

    :raises:
      - ValueError 
          if neither ``(y, sr)`` nor ``S`` are provided

    """

    # First, compute mel spectrogram
    if S is None:
        if y is None:
            raise ValueError('One of "S" or "y" must be provided.')

        S = np.abs(feature(y=y, sr=sr, **kwargs))

        # Convert to dBs
        S = librosa.core.logamplitude(S)

    # Compute first difference
    onsets = np.diff(S, n=1, axis=1)

    # Discard negatives (decreasing amplitude)
    onsets = np.maximum(0.0, onsets)

    # Average over mel bands
    onsets = aggregate(onsets, axis=0)

    # remove the DC component
    if detrend:
        onsets = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onsets)

    return onsets