def spectral_centroid(stft): """ Calculate the spectral centroid for a STFT time-series. The spectral centroid is a well known timbral feature that is used to describe the brightness of a sound. It represents the center of gravity of the frequency components of a signal [Park2010]_. .. math:: \\operatorname{SC} = \\frac{\\sum_{k=1}^{N} |X(k)| \\cdot f_k }{\\sum_{k=1}^{N} |X(k)|} Where `X(k)` is the result of the STFT for the `k-th` frequency bin. Args ---- stft : iracema.spectral.STFT A STFT object """ def function(X): return _spectral_centroid(X, stft.frequencies) time_series = aggregate_features(stft, function) time_series.label = 'SpectralCentroid' time_series.unit = 'Hz' return time_series
def spectral_entropy(stft): """ Calculate the spectral entropy for a STFT time series The spectral entropy is based on the concept of information entropy from Shannon's information theory. It measures the unpredictability of the given state of a spectral distribution. .. math:: \\operatorname{SEpy} = - \\sum_{k}^{N} P(k) \\cdot \\log_2 P(k) Where .. math:: P(i)=\\frac{|X(i)|^2}{\sum_{j}^{N} |X(j)|^2} More info at https://www.mathworks.com/help/signal/ref/pentropy.html. """ def function(X): N = stft.nfeatures P = np.abs(X)**2 / np.sum(np.abs(X)**2) H = -(np.sum(P * np.log2(P))) / np.log2(N) return H time_series = aggregate_features(stft, function) time_series.label = 'Spectral Entropy' time_series.unit = '' return time_series
def spectral_energy(stft): """ Calculate the total energy of an STFT frame. Spectral Energy is the total energy of an STFT frame. .. math:: \\operatorname{SF} = \\sum_{k=1}^{N} H(|X(t, k)| - |X(t-1, k)|) """ def function(frame): return np.sum(np.abs(frame)**2) time_series = aggregate_features(stft, function) time_series.label = 'Spectral Energy' time_series.unit = '' return time_series
def harmonic_energy(harmonics_magnitude): """ Calculate the energy of harmonic partials. Harmonic energy is the energy of the harmonic partials of a signal. .. math:: \\operatorname{HE} = \\sum_{k=1}^{H} A(k)^2 """ def _func(frame): return np.sum(frame**2) time_series = aggregate_features(harmonics_magnitude, _func) time_series.label = 'Harmonic Energy' time_series.unit = '' return time_series
def spectral_spread(stft): """ Calculate the spectral spread for a STFT time-series. The spectral spread represents the spread of the spectrum around the spectral centroid [Peeters2011]_, [Lerch2012]_. .. math:: \\operatorname{SSp} = \\sqrt{\\frac{\\sum_{k=1}^{N} |X(k)| \\cdot (f_k - SC)^2 }{\\sum_ {k=1}^{N} |X (k)|}} Where `X(k)` is the result of the STFT for the `k-th` frequency bin and SC is the spectral centroid for the frame. """ def function(X): return _spectral_spread(X, stft.frequencies) time_series = aggregate_features(stft, function) time_series.label = 'SpectralSpread' time_series.unit = 'Hz' return time_series
def oer(harmonics): """ Calculate the odd-to-even ratio for the harmonics time series. The OER represents the odd-to-even ratio among the harmonics of an audio signal. This value will be higher for sounds with predominantly odd harmonics, such as the clarinet. .. math:: \\operatorname{OER}=\\frac{\\sum_{h=1}^{H / 2} A(2 h - 1)^{2}\\left(t_{m}\\right)}{\\sum_{h=1}^{H / 2} A(2 h)^{2}\\left(t_{m}\\right)} Where :math:`A(h)` represents the amplitude of the h-th harmonic partial. """ def _func(A): odd_energy = np.sum(A[::2])**2 even_energy = np.sum(A[1::2])**2 if even_energy == 0: return 0. return odd_energy / even_energy time_series = aggregate_features(harmonics, _func) time_series.label = 'OER' time_series.unit = '' return time_series
def harmonic_centroid(harmonics): """ Harmonic Centroid The harmonic centroid represents the center of gravity of the amplitudes of the harmonic series. .. math:: \\operatorname{HC} = \\frac{\\sum_{k=1}^{H} A(k) \\cdot f_k }{\\sum_{k=1}^{H} A(k)} Where :math:`A(h)` represents the amplitude of the h-th harmonic partial. """ def _func(A): abs_A = np.abs(A) sum_abs_A = np.sum(abs_A) if sum_abs_A == 0: return 0 return np.sum(abs_A * np.arange(0, len(A))) / sum_abs_A time_series = aggregate_features(harmonics, _func) time_series.label = 'HarmonicCentroid' time_series.unit = 'Harmonic Number' return time_series
def spectral_kurtosis(stft): """ Calculate the spectral kurtosis for an STFT time series The spectral kurtosis is a measure of the flatness of the distribution of the spectrum around its mean value. It will output the value 3 for Gaussian distributions. Values smaller than 3 represent flatter distributions, while values larger than 3 represent peakier distributions [Lerch2012]_. .. math:: \\operatorname{SKu} = \\frac{2 \\cdot \\sum_{k=1}^{N} \\left( |X(k)| - \\mu_{|X|} \\right)^4 }{ N \\cdot \\sigma_{|X|}^4} Where :math:`\\mu_{|X|}` is the mean value of the maginute spectrum and :math:`\\sigma_{|X|}` its standard deviation. """ def _func(X): return 2 * np.sum(np.abs(X) - np.mean(X))**4 / (len(X) * np.std(X)**4) time_series = aggregate_features(stft, _func) time_series.label = 'SpectralKurtosis' time_series.unit = '' return time_series
def spectral_skewness(stft): """ Calculate the spectral skewness for an STFT time series The spectral skewness is a measure of the asymetry of the distribution of the spectrum around its mean value, and is calculated from its third order moment. It will output negative values when the spectrum has more energy bellow the mean value, and positive values when it has more energy above the mean. Symmetric distributions will output the value zero [Lerch2012]_. .. math:: \\operatorname{SSk} = \\frac{2 \\cdot \\sum_{k=1}^{N} \\left( |X(k)| - \\mu_{|X|} \\right)^3 }{ N \\cdot \\sigma_{|X|}^3} Where :math:`\\mu_{|X|}` is the mean value of the maginute spectrum and :math:`\\sigma_{|X|}` its standard deviation. """ def _func(X): return 2 * np.sum(np.abs(X) - np.mean(X))**3 / (len(X) * np.std(X)**3) time_series = aggregate_features(stft, _func) time_series.label = 'SpectralSkewness' time_series.unit = '' return time_series
def hfc(stft, method='energy'): """ Calculate the high frequency content for a STFT time-series. The HFC _function produces sharp peaks during attacks or transients [Bello2005]_ and might be a good choice for detecting onsets in percussive sounds. .. math:: \\operatorname{HFC} = \sum_{k=1}^{N} |X(k)|^2 \\cdot k Alternatively, you can set ``method`` = `'amplitude'` instead of `'energy'` (default value): .. math:: \\operatorname{HFC} = \sum_{k=1}^{N} |X(k)| \\cdot k Args ---- stft : iracema.spectral.STFT STFT time-series. method : str Method of choice to calculate the HFC. """ def _func(X): N = X.shape[0] W = np.arange(1, N + 1) if method == 'energy': return np.sum(W * np.abs(X)**2) / N if method == 'amplitude': return np.sum(W * np.abs(X)) / N ValueError("the argument `method` must be 'energy' or 'amplitude'") time_series = aggregate_features(stft, _func) time_series.label = 'HFC' time_series.unit = '' return time_series
def spectral_flatness(stft): """ Calculate the spectral flatness for a given STFT. The spectral flatness gives an estimation of the noisiness / sinusoidality of an audio signal (for the whole spectrum or for a frequency range). It can be used to determine voiced / unvoiced parts of a signal [Park2004]_. It is defined as the ratio between the `geometric mean` and the `arithmetic mean` of the energy spectrum: .. math:: :nowrap: \\begin{eqnarray} \\operatorname{SFM} = 10 log_{10} \\left( \\frac {\\left( \\prod_{k=1}^{N} |X(k)| \\right)^\\frac{1}{N}} { \\frac{1}{N} \\sum_{k=1}^{N} |X(k)| } \\right) \\end{eqnarray} Where `X(k)` is the result of the STFT for the `k-th` frequency bin. Args ---- time_series : iracema.spectral.STFT A STFT object """ def function(X): stft_magnitudes = np.abs(X) return 10 * np.log10(gmean(stft_magnitudes) / np.mean(stft_magnitudes)) time_series = aggregate_features(stft, function) time_series.label = 'SpectralFlatness' time_series.unit = '' return time_series
def hps(fft_time_series, minf0, maxf0, n_downsampling=16, decimation='discard'): """ Extract the pitch using Harmonic Product Spectrum. The Harmonic Product Spectrum measures the maximum coincidence for harmonics [Cuadra2001]_. It is based on successive downsampling operations on the frequency spectrum of the signal. If the signal contains harmonic components, then it should contain energy in the frequency positions corresponding to the integer multiples of the fundamental frequency. So by down-sampling the spectrum by increasing integer factors :math:`(1,2,3,...,R)` it is possible to align the energy of its harmonic components with the fundamental frequency of the signal. Then we multiply the original spectrum and its downsampled versions. This operation will make a strong peak appear in a position that corresponds to the fundamental frequency. The HPS calculates the maximum coincidence for harmonics, according to the equation: .. math:: Y(\\omega) = \\prod_{r=1}^{R} |X(\\omega r)| where :math:`X(\omega r)` represents one spectral frame and :math:`R` is the number of harmonics to be considered in the calculation. After this calculation a simple peak detection algorithm is used to obtain the fundamental frequency of the frame. This implementation modifies this approach adding an offset of 1 to the magnitude spectrum of the signal before applying the product shown in the equation above. This makes the algorithm more reliable in situations where some harmonics have very little or no energy at all (float arithmetic is not reliable when values get too close to zero). Also, alternatively to the original approach, it is possible to choose between different interpolation methods, using the argument `decimation`. Args ---- fft_time_series : iracema.spectral.FFTs FFT time series. minf0 : float Lower frequency limit to search for f0. maxf0 : float Upper frequency limit to search for f0. n_downsampling : int Number of downsampling operations decimation : 'discard', 'mean' or 'interpolation' Type of decimation operation to be performed. Return ------ pitch : TimeSeries A pitch time series References ---------- .. [Cuadra2001] De La Cuadra, P. Efficient pitch detection techniques for interactive music. In ICMC, page 403–406, 2001. """ if minf0 >= maxf0: raise ValueError('The parameter maxf0 must be greater than minf0.') N = fft_time_series.data.shape[-2] ix_maxf0 = np.int(np.ceil( (N * maxf0) / fft_time_series.max_frequency)) # max index to search for f0 ix_minf0 = np.int(np.ceil( (N * minf0) / fft_time_series.max_frequency)) # min index to search for f0 def calculate(X): """ Calculate pitch for frame. """ N = X.shape[-1] # length of the FFT S = np.zeros((n_downsampling + 1, N), dtype=np.float_) # matrix with the decimated FFTs X_mag = np.abs(X) S[0, :] = X_mag for q in range(2, n_downsampling + 2): if decimation == 'mean': ds = decimate_mean(X_mag, q) elif decimation == 'interpolation': ds = sig.decimate(X_mag, q) elif decimation == 'discard': ds = X_mag[::q] S[q - 1, :ds.shape[-1]] = ds / q S = S + 1 p_fzero = np.sum(S, axis=0) ix_f0 = ix_minf0 + np.argmax(p_fzero[ix_minf0:ix_maxf0]) return fft_time_series.frequencies[ix_f0] pitch_time_series = aggregate_features(fft_time_series, calculate) pitch_time_series.label = 'Pitch (HPS)' pitch_time_series.unit = 'Hz' return pitch_time_series
def expan(fft_time_series, minf0=24., maxf0=4200., nharm=12, ncand=5, min_mag_cand=0.1, noisiness_tresh=0.99, perc_tol=0.04): """ Extract the pitch using the Expan pitch detection algorithm. Args ---- fft_time_series : iracema.spectral.FFTs FFT time series. minf0 : float Lower frequency limit to search for f0. maxf0 : float Upper frequency limit to search for f0. nharm : int Number of harmonics to be considered. ncand : int Number of f0 candidate components to be used. min_mag_cand : float Minimum magnitude of the candidate to be chosen as f0. noisiness_tresh : float Noisiness treshold. perc_tol : float Tolerance percentage to search for harmonics. Return ------ pitch : TimeSeries A pitch time series """ if minf0 >= maxf0: raise ValueError('The parameter maxf0 must be greater than minf0.') if nharm < 3: raise ValueError('The parameter nharm must be greater than 3.') N = fft_time_series.data.shape[-2] ix_maxf0 = np.int(np.ceil( (N * maxf0) / fft_time_series.max_frequency)) # max index to search for f0 ix_minf0 = np.int(np.ceil( (N * minf0) / fft_time_series.max_frequency)) # min index to search for f0 def frame_pitch(fft_frame): """ Extract the pitch and the harmonics for a fft frame. """ N = fft_frame.shape[-1] # length of the FFTT fft_frame_mag = np.abs(fft_frame) # searching for f0 candidates _, ix_cand = n_highest_peaks(fft_frame_mag[ix_minf0:ix_maxf0], ncand) ix_cand = ix_cand + ix_minf0 # TODO: check these indexes # searching for local peaks all over the FFT _, ix_pks = local_peaks(fft_frame_mag[ix_minf0:]) ix_pks = ix_pks + ix_minf0 # TODO: this could probably be done more efficiently, the peak searching # is being performed twice # update the number of candidates with the number of local peaks found n_cand = len(ix_cand) # if no peaks were found, return zeroed arrays if not ix_cand.any(): return 0 # memory allocation cand_mag = np.zeros((ncand, nharm)) ix_cand_harm = np.zeros((ncand, nharm)) # iterate the candidates for i, ix_f0 in enumerate(ix_cand): delta = ix_f0 * perc_tol cand_mag[i, 0] = fft_frame_mag[ix_f0] # test the candidate's magnitude compared to the frame's max if cand_mag[i, 0] > min_mag_cand * np.max(fft_frame_mag): ix_cand_harm[i, 0] = ix_f0 # iterate the candidate's potential harmonics for j in range(1, nharm): ix_f_harm = ix_f0 * (j + 1) # check if it is within the boundaries of the FFT if ix_f_harm < N: # set the tolerance margin to search for the harmonic ix_tol_low = int(ix_f_harm - delta) ix_tol_hi = int(ix_f_harm + delta) if ix_tol_low < ix_minf0: ix_tol_low = ix_minf0 # search for highest peak within the tolerance margin val_harm, ix_harm = n_highest_peaks( fft_frame_mag[ix_tol_low:ix_tol_hi], 1) ix_harm = ix_harm + ix_tol_low if ix_harm: cand_mag[i, j] = val_harm ix_cand_harm[i, j] = ix_harm else: cand_mag[i, j] = fft_frame_mag[(j + 1) * ix_f0] ix_cand_harm[i, j] = (j + 1) * ix_f0 else: # stop the loop if the frequency index is out of bound break energy_harm = np.zeros(n_cand) # calculate the energy of the harmonic components for each candidate for i in range(n_cand): energy_harm[i] = np.sum(cand_mag[i, :]**2.) # choose the candidate with the highest harmonic energy i = np.argmax(energy_harm) # one last test, the noisiness for the winner candidate must be bellow # the noisiness threshold h_energy = energy_harm[i] frame_energy = np.sum(fft_frame_mag**2.) frame_noisiness = 1. - (h_energy / frame_energy) if frame_noisiness < noisiness_tresh: return ix_cand_harm[i, 0] * fft_time_series.max_frequency / N return 0. pitch_time_series = aggregate_features(fft_time_series, frame_pitch) pitch_time_series.label = 'Pitch (HPS)' pitch_time_series.unit = 'Hz' return pitch_time_series