コード例 #1
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_centroid(stft):
    """
    Calculate the spectral centroid for a STFT time-series.

    The spectral centroid is a well known timbral feature that is used to
    describe the brightness of a sound. It represents the center of gravity
    of the frequency components of a signal [Park2010]_.

    .. math::
       \\operatorname{SC} = \\frac{\\sum_{k=1}^{N} |X(k)| \\cdot f_k }{\\sum_{k=1}^{N} |X(k)|}

    Where `X(k)` is the result of the STFT for the `k-th` frequency bin.

    Args
    ----
    stft : iracema.spectral.STFT
        A STFT object
    """
    def function(X):
        return _spectral_centroid(X, stft.frequencies)

    time_series = aggregate_features(stft, function)
    time_series.label = 'SpectralCentroid'
    time_series.unit = 'Hz'
    return time_series
コード例 #2
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_entropy(stft):
    """
    Calculate the spectral entropy for a STFT time series

    The spectral entropy is based on the concept of information entropy from
    Shannon's information theory. It measures the unpredictability of the given
    state of a spectral distribution.

    .. math:: \\operatorname{SEpy} = - \\sum_{k}^{N} P(k) \\cdot \\log_2 P(k)

    Where 

    .. math:: P(i)=\\frac{|X(i)|^2}{\sum_{j}^{N} |X(j)|^2}

    More info at https://www.mathworks.com/help/signal/ref/pentropy.html.
    """
    def function(X):
        N = stft.nfeatures
        P = np.abs(X)**2 / np.sum(np.abs(X)**2)
        H = -(np.sum(P * np.log2(P))) / np.log2(N)
        return H

    time_series = aggregate_features(stft, function)
    time_series.label = 'Spectral Entropy'
    time_series.unit = ''
    return time_series
コード例 #3
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_energy(stft):
    """
    Calculate the total energy of an STFT frame.

    Spectral Energy is the total energy of an STFT frame.

    .. math:: \\operatorname{SF} = \\sum_{k=1}^{N} H(|X(t, k)| - |X(t-1, k)|)
    """
    def function(frame):
        return np.sum(np.abs(frame)**2)

    time_series = aggregate_features(stft, function)
    time_series.label = 'Spectral Energy'
    time_series.unit = ''
    return time_series
コード例 #4
0
ファイル: features.py プロジェクト: cegeme/iracema
def harmonic_energy(harmonics_magnitude):
    """
    Calculate the energy of harmonic partials.

    Harmonic energy is the energy of the harmonic partials of a signal.

    .. math:: \\operatorname{HE} = \\sum_{k=1}^{H} A(k)^2
    """
    def _func(frame):
        return np.sum(frame**2)

    time_series = aggregate_features(harmonics_magnitude, _func)
    time_series.label = 'Harmonic Energy'
    time_series.unit = ''
    return time_series
コード例 #5
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_spread(stft):
    """
    Calculate the spectral spread for a STFT time-series.

    The spectral spread represents the spread of the spectrum around the
    spectral centroid [Peeters2011]_, [Lerch2012]_.

    .. math:: \\operatorname{SSp} = \\sqrt{\\frac{\\sum_{k=1}^{N} |X(k)| \\cdot (f_k - SC)^2 }{\\sum_
       {k=1}^{N} |X (k)|}}

    Where `X(k)` is the result of the STFT for the `k-th` frequency bin and SC
    is the spectral centroid for the frame.
    """
    def function(X):
        return _spectral_spread(X, stft.frequencies)

    time_series = aggregate_features(stft, function)
    time_series.label = 'SpectralSpread'
    time_series.unit = 'Hz'
    return time_series
コード例 #6
0
ファイル: features.py プロジェクト: cegeme/iracema
def oer(harmonics):
    """
    Calculate the odd-to-even ratio for the harmonics time series.

    The OER represents the odd-to-even ratio among the harmonics of an audio
    signal. This value will be higher for sounds with predominantly odd
    harmonics, such as the clarinet.
    
    .. math:: \\operatorname{OER}=\\frac{\\sum_{h=1}^{H / 2} A(2 h - 1)^{2}\\left(t_{m}\\right)}{\\sum_{h=1}^{H / 2} A(2 h)^{2}\\left(t_{m}\\right)}

    Where :math:`A(h)` represents the amplitude of the h-th harmonic partial.
    """
    def _func(A):
        odd_energy = np.sum(A[::2])**2
        even_energy = np.sum(A[1::2])**2
        if even_energy == 0:
            return 0.
        return odd_energy / even_energy

    time_series = aggregate_features(harmonics, _func)
    time_series.label = 'OER'
    time_series.unit = ''
    return time_series
コード例 #7
0
ファイル: features.py プロジェクト: cegeme/iracema
def harmonic_centroid(harmonics):
    """
    Harmonic Centroid

    The harmonic centroid represents the center of gravity of the amplitudes
    of the harmonic series.

    .. math::
       \\operatorname{HC} = \\frac{\\sum_{k=1}^{H} A(k) \\cdot f_k }{\\sum_{k=1}^{H} A(k)}

    Where :math:`A(h)` represents the amplitude of the h-th harmonic partial.
    """
    def _func(A):
        abs_A = np.abs(A)
        sum_abs_A = np.sum(abs_A)
        if sum_abs_A == 0:
            return 0
        return np.sum(abs_A * np.arange(0, len(A))) / sum_abs_A

    time_series = aggregate_features(harmonics, _func)
    time_series.label = 'HarmonicCentroid'
    time_series.unit = 'Harmonic Number'
    return time_series
コード例 #8
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_kurtosis(stft):
    """
    Calculate the spectral kurtosis for an STFT time series
    
    The spectral kurtosis is a measure of the flatness of the distribution of
    the spectrum around its mean value. It will output the value 3 for Gaussian
    distributions. Values smaller than 3 represent flatter distributions, while
    values larger than 3 represent peakier distributions [Lerch2012]_.

    .. math::
       \\operatorname{SKu} = \\frac{2 \\cdot \\sum_{k=1}^{N} \\left( |X(k)| - \\mu_{|X|} \\right)^4 }{
       N \\cdot \\sigma_{|X|}^4}

    Where :math:`\\mu_{|X|}` is the mean value of the maginute spectrum and 
    :math:`\\sigma_{|X|}` its standard deviation.
    """
    def _func(X):
        return 2 * np.sum(np.abs(X) - np.mean(X))**4 / (len(X) * np.std(X)**4)

    time_series = aggregate_features(stft, _func)
    time_series.label = 'SpectralKurtosis'
    time_series.unit = ''
    return time_series
コード例 #9
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_skewness(stft):
    """
    Calculate the spectral skewness for an STFT time series
    
    The spectral skewness is a measure of the asymetry of the distribution of
    the spectrum around its mean value, and is calculated from its third order
    moment. It will output negative values when the spectrum has more energy
    bellow the mean value, and positive values when it has more energy above
    the mean. Symmetric distributions will output the value zero [Lerch2012]_.

    .. math::
       \\operatorname{SSk} = \\frac{2 \\cdot \\sum_{k=1}^{N} \\left( |X(k)| - \\mu_{|X|} \\right)^3 }{
       N \\cdot \\sigma_{|X|}^3}

    Where :math:`\\mu_{|X|}` is the mean value of the maginute spectrum and 
    :math:`\\sigma_{|X|}` its standard deviation.
    """
    def _func(X):
        return 2 * np.sum(np.abs(X) - np.mean(X))**3 / (len(X) * np.std(X)**3)

    time_series = aggregate_features(stft, _func)
    time_series.label = 'SpectralSkewness'
    time_series.unit = ''
    return time_series
コード例 #10
0
ファイル: features.py プロジェクト: cegeme/iracema
def hfc(stft, method='energy'):
    """
    Calculate the high frequency content for a STFT time-series.

    The HFC _function produces sharp peaks during attacks or transients
    [Bello2005]_ and might be a good choice for detecting onsets in percussive
    sounds.

    .. math:: \\operatorname{HFC} = \sum_{k=1}^{N} |X(k)|^2 \\cdot k

    Alternatively, you can set ``method`` = `'amplitude'` instead of `'energy'`
    (default value):

    .. math:: \\operatorname{HFC} = \sum_{k=1}^{N} |X(k)| \\cdot k

    Args
    ----
    stft : iracema.spectral.STFT
        STFT time-series.
    method : str
        Method of choice to calculate the HFC.
    """
    def _func(X):
        N = X.shape[0]
        W = np.arange(1, N + 1)

        if method == 'energy':
            return np.sum(W * np.abs(X)**2) / N
        if method == 'amplitude':
            return np.sum(W * np.abs(X)) / N
        ValueError("the argument `method` must be 'energy' or 'amplitude'")

    time_series = aggregate_features(stft, _func)
    time_series.label = 'HFC'
    time_series.unit = ''
    return time_series
コード例 #11
0
ファイル: features.py プロジェクト: cegeme/iracema
def spectral_flatness(stft):
    """
    Calculate the spectral flatness for a given STFT.

    The spectral flatness gives an estimation of the noisiness / sinusoidality
    of an audio signal (for the whole spectrum or for a frequency range). It
    can be used to determine voiced / unvoiced parts of a signal [Park2004]_.

    It is defined as the ratio between the `geometric mean` and the
    `arithmetic mean` of the energy spectrum:

    .. math::
       :nowrap:

       \\begin{eqnarray}
       \\operatorname{SFM} = 10 log_{10} \\left( \\frac
         {\\left( \\prod_{k=1}^{N} |X(k)| \\right)^\\frac{1}{N}}
         { \\frac{1}{N} \\sum_{k=1}^{N} |X(k)| }
       \\right)
       \\end{eqnarray}

    Where `X(k)` is the result of the STFT for the `k-th` frequency bin.

    Args
    ----
    time_series : iracema.spectral.STFT
        A STFT object
    """
    def function(X):
        stft_magnitudes = np.abs(X)
        return 10 * np.log10(gmean(stft_magnitudes) / np.mean(stft_magnitudes))

    time_series = aggregate_features(stft, function)
    time_series.label = 'SpectralFlatness'
    time_series.unit = ''
    return time_series
コード例 #12
0
def hps(fft_time_series,
        minf0,
        maxf0,
        n_downsampling=16,
        decimation='discard'):
    """
    Extract the pitch using Harmonic Product Spectrum.

    The Harmonic Product Spectrum measures the maximum coincidence for
    harmonics [Cuadra2001]_. It is based on successive downsampling operations
    on the frequency spectrum of the signal. If the signal contains harmonic
    components, then it should contain energy in the frequency positions
    corresponding to the integer multiples of the fundamental frequency. So by
    down-sampling the spectrum by increasing integer factors
    :math:`(1,2,3,...,R)` it is possible to align the energy of its harmonic
    components with the fundamental frequency of the signal.

    Then we multiply the original spectrum and its downsampled versions. This
    operation will make a strong peak appear in a position that corresponds to
    the fundamental frequency. The HPS calculates the maximum coincidence for
    harmonics, according to the equation:

    .. math:: Y(\\omega) = \\prod_{r=1}^{R} |X(\\omega r)|

    where :math:`X(\omega r)` represents one spectral frame and :math:`R` is
    the number of harmonics to be considered in the calculation. After this
    calculation a simple peak detection algorithm is used to obtain the
    fundamental frequency of the frame.

    This implementation modifies this approach adding an offset of 1 to the
    magnitude spectrum of the signal before applying the product shown in the
    equation above. This makes the algorithm more reliable in situations where
    some harmonics have very little or no energy at all (float arithmetic is
    not reliable when values get too close to zero).

    Also, alternatively to the original approach, it is possible to choose
    between different interpolation methods, using the argument `decimation`.

    Args
    ----
    fft_time_series : iracema.spectral.FFTs
        FFT time series.
    minf0 : float
        Lower frequency limit to search for f0.
    maxf0 : float
        Upper frequency limit to search for f0.
    n_downsampling : int
        Number of downsampling operations
    decimation : 'discard', 'mean' or 'interpolation'
        Type of decimation operation to be performed.

    Return
    ------
    pitch : TimeSeries
        A pitch time series

    References
    ----------
    .. [Cuadra2001] De La Cuadra, P. Efficient pitch detection techniques for
       interactive music. In ICMC, page 403–406, 2001.
    """
    if minf0 >= maxf0:
        raise ValueError('The parameter maxf0 must be greater than minf0.')

    N = fft_time_series.data.shape[-2]
    ix_maxf0 = np.int(np.ceil(
        (N * maxf0) /
        fft_time_series.max_frequency))  # max index to search for f0
    ix_minf0 = np.int(np.ceil(
        (N * minf0) /
        fft_time_series.max_frequency))  # min index to search for f0

    def calculate(X):
        """
        Calculate pitch for frame.
        """
        N = X.shape[-1]  # length of the FFT
        S = np.zeros((n_downsampling + 1, N),
                     dtype=np.float_)  # matrix with the decimated FFTs
        X_mag = np.abs(X)
        S[0, :] = X_mag
        for q in range(2, n_downsampling + 2):
            if decimation == 'mean':
                ds = decimate_mean(X_mag, q)
            elif decimation == 'interpolation':
                ds = sig.decimate(X_mag, q)
            elif decimation == 'discard':
                ds = X_mag[::q]
            S[q - 1, :ds.shape[-1]] = ds / q

        S = S + 1
        p_fzero = np.sum(S, axis=0)
        ix_f0 = ix_minf0 + np.argmax(p_fzero[ix_minf0:ix_maxf0])

        return fft_time_series.frequencies[ix_f0]

    pitch_time_series = aggregate_features(fft_time_series, calculate)
    pitch_time_series.label = 'Pitch (HPS)'
    pitch_time_series.unit = 'Hz'

    return pitch_time_series
コード例 #13
0
def expan(fft_time_series,
          minf0=24.,
          maxf0=4200.,
          nharm=12,
          ncand=5,
          min_mag_cand=0.1,
          noisiness_tresh=0.99,
          perc_tol=0.04):
    """
    Extract the pitch using the Expan pitch detection algorithm.

    Args
    ----
    fft_time_series : iracema.spectral.FFTs
        FFT time series.
    minf0 : float
        Lower frequency limit to search for f0.
    maxf0 : float
        Upper frequency limit to search for f0.
    nharm : int
        Number of harmonics to be considered.
    ncand : int
        Number of f0 candidate components to be used.
    min_mag_cand : float
        Minimum magnitude of the candidate to be chosen as f0.
    noisiness_tresh : float
        Noisiness treshold.
    perc_tol : float
        Tolerance percentage to search for harmonics.

    Return
    ------
    pitch : TimeSeries
        A pitch time series
    """
    if minf0 >= maxf0:
        raise ValueError('The parameter maxf0 must be greater than minf0.')
    if nharm < 3:
        raise ValueError('The parameter nharm must be greater than 3.')

    N = fft_time_series.data.shape[-2]
    ix_maxf0 = np.int(np.ceil(
        (N * maxf0) /
        fft_time_series.max_frequency))  # max index to search for f0
    ix_minf0 = np.int(np.ceil(
        (N * minf0) /
        fft_time_series.max_frequency))  # min index to search for f0

    def frame_pitch(fft_frame):
        """
        Extract the pitch and the harmonics for a fft frame.
        """
        N = fft_frame.shape[-1]  # length of the FFTT
        fft_frame_mag = np.abs(fft_frame)

        # searching for f0 candidates
        _, ix_cand = n_highest_peaks(fft_frame_mag[ix_minf0:ix_maxf0], ncand)
        ix_cand = ix_cand + ix_minf0  # TODO: check these indexes

        # searching for local peaks all over the FFT
        _, ix_pks = local_peaks(fft_frame_mag[ix_minf0:])
        ix_pks = ix_pks + ix_minf0
        # TODO: this could probably be done more efficiently, the peak searching
        # is being performed twice

        # update the number of candidates with the number of local peaks found
        n_cand = len(ix_cand)

        # if no peaks were found, return zeroed arrays
        if not ix_cand.any():
            return 0

        # memory allocation
        cand_mag = np.zeros((ncand, nharm))
        ix_cand_harm = np.zeros((ncand, nharm))

        # iterate the candidates
        for i, ix_f0 in enumerate(ix_cand):
            delta = ix_f0 * perc_tol
            cand_mag[i, 0] = fft_frame_mag[ix_f0]

            # test the candidate's magnitude compared to the frame's max
            if cand_mag[i, 0] > min_mag_cand * np.max(fft_frame_mag):
                ix_cand_harm[i, 0] = ix_f0

                # iterate the candidate's potential harmonics
                for j in range(1, nharm):
                    ix_f_harm = ix_f0 * (j + 1)

                    # check if it is within the boundaries of the FFT
                    if ix_f_harm < N:
                        # set the tolerance margin to search for the harmonic
                        ix_tol_low = int(ix_f_harm - delta)
                        ix_tol_hi = int(ix_f_harm + delta)
                        if ix_tol_low < ix_minf0:
                            ix_tol_low = ix_minf0

                        # search for highest peak within the tolerance margin
                        val_harm, ix_harm = n_highest_peaks(
                            fft_frame_mag[ix_tol_low:ix_tol_hi], 1)
                        ix_harm = ix_harm + ix_tol_low

                        if ix_harm:
                            cand_mag[i, j] = val_harm
                            ix_cand_harm[i, j] = ix_harm

                        else:
                            cand_mag[i, j] = fft_frame_mag[(j + 1) * ix_f0]
                            ix_cand_harm[i, j] = (j + 1) * ix_f0
                    else:
                        # stop the loop if the frequency index is out of bound
                        break

        energy_harm = np.zeros(n_cand)

        # calculate the energy of the harmonic components for each candidate
        for i in range(n_cand):
            energy_harm[i] = np.sum(cand_mag[i, :]**2.)

        # choose the candidate with the highest harmonic energy
        i = np.argmax(energy_harm)

        # one last test, the noisiness for the winner candidate must be bellow
        # the noisiness threshold
        h_energy = energy_harm[i]
        frame_energy = np.sum(fft_frame_mag**2.)
        frame_noisiness = 1. - (h_energy / frame_energy)

        if frame_noisiness < noisiness_tresh:
            return ix_cand_harm[i, 0] * fft_time_series.max_frequency / N
        return 0.

    pitch_time_series = aggregate_features(fft_time_series, frame_pitch)

    pitch_time_series.label = 'Pitch (HPS)'
    pitch_time_series.unit = 'Hz'

    return pitch_time_series