def onset_strength(y=None, sr=22050, S=None, detrend=False, centering=True, feature=None, aggregate=None, **kwargs): """Compute a spectral flux onset strength envelope. Onset strength at time t is determined by: ``mean_f max(0, S[f, t+1] - S[f, t])`` By default, if a time series ``y`` is provided, S will be the log-power Mel spectrogram. :usage: >>> # Mean aggregation with Mel-scaled spectrogram >>> o_env = librosa.onset.onset_strength(y, sr) >>> # Median aggregation >>> o_env = librosa.onset.onset_strength(y, sr, aggregate=np.median) >>> # Log-frequency spectrogram instead of Mel >>> o_env = librosa.onset.onset_strength(y, sr, feature=librosa.feature.logfsgram) >>> # Or Mel spectrogram with customized options >>> o_env = librosa.onset.onset_strength(y, sr, n_mels=128, fmin=32, fmax=8000) :parameters: - y : np.ndarray [shape=(n,)] audio time-series - sr : int > 0 [scalar] sampling rate of ``y`` - S : np.ndarray [shape=(d, m)] pre-computed (log-power) spectrogram - detrend : bool [scalar] Filter the onset strength to remove the DC component - centering : bool [scalar] Shift the onset function by ``n_fft / (2 * hop_length)`` frames - feature : function Function for computing time-series features, eg, scaled spectrograms. By default, uses :func:`librosa.feature.melspectrogram` - aggregate : function Aggregation function to use when combining onsets at different frequency bins. Default: ``np.mean`` - *kwargs* Additional parameters to ``feature()``, if ``S`` is not provided. .. note:: if ``S`` is provided, then ``(y, sr)`` are optional. :returns: - onset_envelope : np.ndarray [shape=(m,)] vector containing the onset strength envelope :raises: - ValueError if neither ``(y, sr)`` nor ``S`` are provided """ if feature is None: feature = librosa.feature.melspectrogram if aggregate is None: aggregate = np.mean # First, compute mel spectrogram if S is None: if y is None: raise ValueError('One of "S" or "y" must be provided.') S = np.abs(feature(y=y, sr=sr, **kwargs)) # Convert to dBs S = librosa.core.logamplitude(S) # Retrieve the n_fft and hop_length, # or default values for onsets if not provided n_fft = kwargs.get('n_fft', 2048) hop_length = kwargs.get('hop_length', 64) # Compute first difference, include padding for alignment purposes onset_env = np.diff(S, axis=1) onset_env = np.pad(onset_env, ([0, 0], [1, 0]), mode='constant') # Discard negatives (decreasing amplitude) onset_env = np.maximum(0.0, onset_env) # Average over mel bands onset_env = aggregate(onset_env, axis=0) # Counter-act framing effects. Shift the onsets by n_fft / hop_length if centering: onset_env = np.pad(onset_env, (int(n_fft / (2 * hop_length)), 0), mode='constant') # remove the DC component if detrend: onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env) return onset_env
def onset_strength(y=None, sr=22050, S=None, detrend=False, centering=True, feature=None, aggregate=None, **kwargs): """Compute a spectral flux onset strength envelope. Onset strength at time t is determined by: ``mean_f max(0, S[f, t+1] - S[f, t])`` By default, if a time series ``y`` is provided, S will be the log-power Mel spectrogram. :usage: >>> # Mean aggregation with Mel-scaled spectrogram >>> o_env = librosa.onset.onset_strength(y, sr) >>> # Median aggregation >>> o_env = librosa.onset.onset_strength(y, sr, aggregate=np.median) >>> # Log-frequency spectrogram instead of Mel >>> o_env = librosa.onset.onset_strength(y, sr, feature=librosa.feature.logfsgram) >>> # Or Mel spectrogram with customized options >>> o_env = librosa.onset.onset_strength(y, sr, n_mels=128, fmin=32, fmax=8000) :parameters: - y : np.ndarray audio time-series - sr : int sampling rate of ``y`` - S : np.ndarray pre-computed (log-power) spectrogram - detrend : bool Filter the onset strength to remove the DC component - centering : bool Shift the onset function by ``n_fft / (2 * hop_length)`` frames - feature : function Function for computing time-series features, eg, scaled spectrograms. By default, uses ``librosa.feature.melspectrogram`` - aggregate : function Aggregation function to use when combining onsets at different frequency bins. Default: ``np.mean`` - *kwargs* Additional parameters to ``feature()``, if ``S`` is not provided. .. note:: if ``S`` is provided, then ``(y, sr)`` are optional. :returns: - onset_envelope : np.ndarray vector containing the onset strength envelope :raises: - ValueError if neither ``(y, sr)`` nor ``S`` are provided """ if feature is None: feature = librosa.feature.melspectrogram if aggregate is None: aggregate = np.mean # First, compute mel spectrogram if S is None: if y is None: raise ValueError('One of "S" or "y" must be provided.') S = np.abs(feature(y=y, sr=sr, **kwargs)) # Convert to dBs S = librosa.core.logamplitude(S) # Retrieve the n_fft and hop_length, # or default values for onsets if not provided n_fft = kwargs.get('n_fft', 2048) hop_length = kwargs.get('hop_length', 64) # Compute first difference, include padding for alignment purposes onset_env = np.diff(S, axis=1) onset_env = np.pad(onset_env, ([0, 0], [1, 0]), mode='constant') # Discard negatives (decreasing amplitude) onset_env = np.maximum(0.0, onset_env) # Average over mel bands onset_env = aggregate(onset_env, axis=0) # Counter-act framing effects. Shift the onsets by n_fft / hop_length if centering: onset_env = np.pad(onset_env, (n_fft / (2 * hop_length), 0), mode='constant') # remove the DC component if detrend: onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env) return onset_env
def onset_strength( y=None, sr=22050, S=None, detrend=False, feature=librosa.feature.melspectrogram, aggregate=np.mean, **kwargs ): """Spectral flux onset strength. Onset strength at time t is determined by: ``mean_f max(0, S[f, t+1] - S[f, t])`` By default, if a time series is provided, S will be the log-power Mel spectrogram. :usage: >>> # Mean aggregation with Mel-scaled spectrogram >>> onsets = librosa.onset.onset_strength(y, sr) >>> # Median aggregation >>> onsets = librosa.onset.onset_strength(y, sr, aggregate=np.median) >>> # Log-frequency spectrogram instead of Mel >>> onsets = librosa.onset.onset_strength(y, sr, feature=librosa.feature.logfsgram) >>> # Or Mel spectrogram with customized options >>> onsets = librosa.onset.onset_strength(y, sr, n_mels=128, fmin=32, fmax=8000) :parameters: - y : np.ndarray audio time-series - sr : int sampling rate of ``y`` - S : np.ndarray pre-computed (log-power) spectrogram - detrend : bool Filter the onset strength to remove - feature : function Function for computing time-series features, eg, scaled spectrograms. By default, uses ``librosa.feature.melspectrogram`` - aggregate : function Aggregation function to use when combining onsets at different frequency bins. - kwargs Parameters to ``feature()``, if ``S`` is not provided. .. note:: if ``S`` is provided, then ``(y, sr)`` are optional. :returns: - onsets : np.ndarray vector of onset strengths :raises: - ValueError if neither ``(y, sr)`` nor ``S`` are provided """ # First, compute mel spectrogram if S is None: if y is None: raise ValueError('One of "S" or "y" must be provided.') S = np.abs(feature(y=y, sr=sr, **kwargs)) # Convert to dBs S = librosa.core.logamplitude(S) # Compute first difference onsets = np.diff(S, n=1, axis=1) # Discard negatives (decreasing amplitude) onsets = np.maximum(0.0, onsets) # Average over mel bands onsets = aggregate(onsets, axis=0) # remove the DC component if detrend: onsets = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onsets) return onsets