Python SpectralWhitening Examples

Programming Language: Python

Namespace/Package Name: essentia.standard

Method/Function: SpectralWhitening

Examples at hotexamples.com: 10

Python SpectralWhitening - 10 examples found. These are the top rated real world Python examples of essentia.standard.SpectralWhitening extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: spectral.py Project: vrdev/essentia

def hpcpgram(audio,
             sampleRate=44100,
             frameSize=4096,
             hopSize=2048,
             numBins=12,
             windowType='blackmanharris62',
             minFrequency=100,
             maxFrequency=4000,
             whitening=False,
             maxPeaks=100,
             magnitudeThreshold=1e-05,
             **kwargs):
    """
    Compute Harmonic Pitch Class Profile (HPCP) Grams for overlapped frames of a given input audio signal 

    For additional list of parameters of essentia standard mode HPCP please refer to 
    http://essentia.upf.edu/documentation/reference/std_HPCP.html

    References:
    [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.

    Inputs
        audio (2d vector): audio signal

    Parameters:
        sampleRate : (real ∈ (0, ∞), default = 44100) :
        the sampling rate of the audio signal [Hz]

        frameSize (integer ∈ [1, ∞), default = 1024) :
        the output frame size
        
        hopSize (integer ∈ [1, ∞), default = 512) :
        the hop size between frames

        numBins : (integer ∈ [12, ∞), default = 12) :
        the size of the output HPCP (must be a positive nonzero multiple of 12)

        windowType (string ∈ {hamming, hann, hannnsgcq, triangular, square, blackmanharris62, blackmanharris70, blackmanharris74, blackmanharris92}, default = blackmanharris62) :
        the window type, which can be 'hamming', 'hann', 'triangular', 'square' or 'blackmanharrisXX'

        maxFrequency : (real ∈ (0, ∞), default = 4000) :
        the maximum frequency that contributes to the SpectralPeaks and HPCP algorithms computation [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)

        minFrequency : (real ∈ (0, ∞), default = 100) :
        the minimum frequency that contributes to the SpectralPeaks and HPCP algorithm computation [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)

        maxPeaks (integer ∈ [1, ∞), default = 100) :
        the maximum number of returned peaks while calculating SpectralPeaks

        magnitudeThreshold (real ∈ (-∞, ∞), default = 0) :
        peaks below this given threshold are not outputted while calculating Spectral Peaks

        whitening : (boolean (True, False), default = False)
        Optional step of computing spectral whitening to the output from speakPeak magnitudes

        kwargs : additional keyword arguments
        Arguments to parameterize HPCP alogithms.
        see standard mode HPCP algorithm (http://essentia.upf.edu/documentation/reference/std_HPCP.html).


    Returns: hpcpgram of overlapped frames of input audio signal (2D vector) 

    """
    frameGenerator = es.FrameGenerator(array(audio),
                                       frameSize=frameSize,
                                       hopSize=hopSize)
    window = es.Windowing(type=windowType)
    spectrum = es.Spectrum()
    # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html
    spectralPeaks = es.SpectralPeaks(magnitudeThreshold=magnitudeThreshold,
                                     maxFrequency=maxFrequency,
                                     minFrequency=minFrequency,
                                     maxPeaks=maxPeaks,
                                     sampleRate=sampleRate)
    # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html
    spectralWhitening = es.SpectralWhitening(maxFrequency=maxFrequency,
                                             sampleRate=sampleRate)
    # http://essentia.upf.edu/documentation/reference/std_HPCP.html
    hpcp = es.HPCP(sampleRate=sampleRate,
                   maxFrequency=maxFrequency,
                   minFrequency=minFrequency,
                   size=numBins,
                   **kwargs)
    pool = Pool()
    #compute hpcp for each frame and add the results to the pool
    for frame in frameGenerator:
        spectrum_mag = spectrum(window(frame))
        frequencies, magnitudes = spectralPeaks(spectrum_mag)
        if whitening:
            w_magnitudes = spectralWhitening(spectrum_mag, frequencies,
                                             magnitudes)
            hpcp_vector = hpcp(frequencies, w_magnitudes)
        else:
            hpcp_vector = hpcp(frequencies, magnitudes)
        pool.add('tonal.hpcp', hpcp_vector)
    return pool['tonal.hpcp']

Example #2

Show file

File: extract_profiles.py Project: EQ4/edmkey

        analysis_files.remove('.DS_Store')
    print len(analysis_files), '\nsongs in folder.\n'
    groundtruth_files = os.listdir(groundtruth_folder)
    if '.DS_Store' in groundtruth_files:
        groundtruth_files.remove('.DS_Store')

# ANALYSIS
# ========
song_chromas = []
for item in analysis_files:
    loader = estd.MonoLoader(filename=audio_folder + '/' + item,
                             sampleRate=sample_rate)
    cut = estd.FrameCutter(frameSize=window_size, hopSize=hop_size)
    window = estd.Windowing(size=window_size, type=window_type)
    rfft = estd.Spectrum(size=window_size)
    sw = estd.SpectralWhitening(maxFrequency=max_frequency,
                                sampleRate=sample_rate)
    speaks = estd.SpectralPeaks(magnitudeThreshold=magnitude_threshold,
                                maxFrequency=max_frequency,
                                minFrequency=min_frequency,
                                maxPeaks=max_peaks,
                                sampleRate=sample_rate)
    hpcp = estd.HPCP(bandPreset=band_preset,
                     harmonics=harmonics,
                     maxFrequency=max_frequency,
                     minFrequency=min_frequency,
                     nonLinear=non_linear,
                     normalized=normalize,
                     referenceFrequency=reference_frequency,
                     sampleRate=sample_rate,
                     size=hpcp_size,
                     splitFrequency=split_frequency,

Example #3

Show file

    def chroma_hpcp(self,
                    frameSize=4096,
                    hopSize=2048,
                    windowType='blackmanharris62',
                    harmonicsPerPeak=8,
                    magnitudeThreshold=1e-05,
                    maxPeaks=1000,
                    whitening=True,
                    referenceFrequency=440,
                    minFrequency=40,
                    maxFrequency=5000,
                    nonLinear=False,
                    numBins=12,
                    display=False):
        '''
        Compute Harmonic Pitch Class Profiles (HPCP) for the input audio files using essentia standard mode using
        the default parameters as mentioned in [1].
        Please refer to the following paper for detailed explanantion of the algorithm.
        [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.
        For full list of parameters of essentia standard mode HPCP please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html
        Parameters
            harmonicsPerPeak : (integer ∈ [0, ∞), default = 0) :
            number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution
            maxFrequency : (real ∈ (0, ∞), default = 5000) :
            the maximum frequency that contributes to the HPCP [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)

            minFrequency : (real ∈ (0, ∞), default = 40) :
            the minimum frequency that contributes to the HPCP [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)

            nonLinear : (bool ∈ {true, false}, default = false) :
            apply non-linear post-processing to the output (use with normalized='unitMax'). Boosts values close to 1, decreases values close to 0.
            normalized (string ∈ {none, unitSum, unitMax}, default = unitMax) :
            whether to normalize the HPCP vector

            referenceFrequency : (real ∈ (0, ∞), default = 440) :
            the reference frequency for semitone index calculation, corresponding to A3 [Hz]

            sampleRate : (real ∈ (0, ∞), default = 44100) :
            the sampling rate of the audio signal [Hz]

            numBins : (integer ∈ [12, ∞), default = 12) :
            the size of the output HPCP (must be a positive nonzero multiple of 12)
            whitening : (boolean (True, False), default = False)
            Optional step of computing spectral whitening to the output from speakPeak magnitudes
        '''

        audio = array(self.audio_vector)

        #print audio.shape

        frameGenerator = estd.FrameGenerator(audio,
                                             frameSize=frameSize,
                                             hopSize=hopSize)

        window = estd.Windowing(type=windowType)

        spectrum = estd.Spectrum()

        # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html
        spectralPeaks = estd.SpectralPeaks(magnitudeThreshold=0,
                                           maxFrequency=maxFrequency,
                                           minFrequency=minFrequency,
                                           maxPeaks=maxPeaks,
                                           orderBy="frequency",
                                           sampleRate=self.fs)

        # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html
        spectralWhitening = estd.SpectralWhitening(maxFrequency=maxFrequency,
                                                   sampleRate=self.fs)

        # http://essentia.upf.edu/documentation/reference/std_HPCP.html
        hpcp = estd.HPCP(sampleRate=self.fs,
                         maxFrequency=maxFrequency,
                         minFrequency=minFrequency,
                         referenceFrequency=referenceFrequency,
                         nonLinear=nonLinear,
                         harmonics=harmonicsPerPeak,
                         size=numBins)

        pool = Pool()

        #compute hpcp for each frame and add the results to the pool
        for frame in frameGenerator:
            spectrum_mag = spectrum(window(frame))
            frequencies, magnitudes = spectralPeaks(spectrum_mag)
            if whitening:
                w_magnitudes = spectralWhitening(spectrum_mag, frequencies,
                                                 magnitudes)
                hpcp_vector = hpcp(frequencies, w_magnitudes)
            else:
                hpcp_vector = hpcp(frequencies, magnitudes)
            pool.add('tonal.hpcp', hpcp_vector)

        if display:
            display_chroma(np.swapaxes(pool['tonal.hpcp']), 0, 1)

        return pool['tonal.hpcp']

Example #4

Show file

def key_aes(input_audio_file, output_text_file, **kwargs):
    """
    This function estimates the overall key of an audio track
    optionally with extra modal information.
    :type input_audio_file: str
    :type output_text_file: str

    """
    if not kwargs:
        kwargs = KEY_SETTINGS

    loader = estd.MonoLoader(filename=input_audio_file,
                             sampleRate=kwargs["SAMPLE_RATE"])

    cut = estd.FrameCutter(frameSize=kwargs["WINDOW_SIZE"],
                           hopSize=kwargs["HOP_SIZE"])

    window = estd.Windowing(size=kwargs["WINDOW_SIZE"],
                            type=kwargs["WINDOW_SHAPE"])

    rfft = estd.Spectrum(size=kwargs["WINDOW_SIZE"])

    sw = estd.SpectralWhitening(maxFrequency=kwargs["MAX_HZ"],
                                sampleRate=kwargs["SAMPLE_RATE"])

    speaks = estd.SpectralPeaks(
        magnitudeThreshold=kwargs["SPECTRAL_PEAKS_THRESHOLD"],
        maxFrequency=kwargs["MAX_HZ"],
        minFrequency=kwargs["MIN_HZ"],
        maxPeaks=kwargs["SPECTRAL_PEAKS_MAX"],
        sampleRate=kwargs["SAMPLE_RATE"])

    hpcp = estd.HPCP(bandPreset=kwargs["HPCP_BAND_PRESET"],
                     splitFrequency=kwargs["HPCP_SPLIT_HZ"],
                     harmonics=kwargs["HPCP_HARMONICS"],
                     maxFrequency=kwargs["MAX_HZ"],
                     minFrequency=kwargs["MIN_HZ"],
                     nonLinear=kwargs["HPCP_NON_LINEAR"],
                     normalized=kwargs["HPCP_NORMALIZE"],
                     referenceFrequency=kwargs["HPCP_REFERENCE_HZ"],
                     sampleRate=kwargs["SAMPLE_RATE"],
                     size=kwargs["HPCP_SIZE"],
                     weightType=kwargs["HPCP_WEIGHT_TYPE"],
                     windowSize=kwargs["HPCP_WEIGHT_WINDOW_SEMITONES"],
                     maxShifted=kwargs["HPCP_SHIFT"])

    audio = loader()

    if kwargs["HIGHPASS_CUTOFF"] is not None:
        hpf = estd.HighPass(cutoffFrequency=kwargs["HIGHPASS_CUTOFF"],
                            sampleRate=kwargs["SAMPLE_RATE"])
        audio = hpf(hpf(hpf(audio)))

    if kwargs["DURATION"] is not None:
        audio = audio[(kwargs["START_TIME"] *
                       kwargs["SAMPLE_RATE"]):(kwargs["DURATION"] *
                                               kwargs["SAMPLE_RATE"])]

    duration = len(audio)
    number_of_frames = int(duration / kwargs["HOP_SIZE"])
    chroma = []
    for bang in range(number_of_frames):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)
        if kwargs["SPECTRAL_WHITENING"]:
            p2 = sw(spek, p1, p2)

        pcp = hpcp(p1, p2)

        if np.sum(pcp) > 0:
            if not kwargs["DETUNING_CORRECTION"] or kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'average':
                chroma.append(pcp)
            elif kwargs["DETUNING_CORRECTION"] and kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'frame':
                pcp = _detuning_correction(pcp, kwargs["HPCP_SIZE"])
                chroma.append(pcp)
            else:
                raise NameError(
                    "SHIFT_SCOPE musts be set to 'frame' or 'average'.")

    if not chroma:
        return 'Silence'

    chroma = np.sum(chroma, axis=0)
    chroma = norm_peak(chroma)

    if kwargs["PCP_THRESHOLD"] is not None:
        chroma = vector_threshold(chroma, kwargs["PCP_THRESHOLD"])

    if kwargs["DETUNING_CORRECTION"] and kwargs[
            "DETUNING_CORRECTION_SCOPE"] == 'average':
        chroma = _detuning_correction(chroma, kwargs["HPCP_SIZE"])

    # Adjust to essentia's HPCP calculation starting on A (pc = 9)
    chroma = np.roll(chroma, -3 * (kwargs["HPCP_SIZE"] // 12))

    estimation_1 = estimate_key(chroma,
                                kwargs["KEY_PROFILE"],
                                kwargs["PROFILE_INTERPOLATION"],
                                conf_thres=kwargs["NOKEY_THRESHOLD"],
                                vocabulary=kwargs["KEY_VOCABULARY"])

    key_1 = estimation_1[0]
    correlation_value = estimation_1[1]

    if kwargs["WITH_MODAL_DETAILS"]:
        estimation_2 = _key7(chroma, kwargs["PROFILE_INTERPOLATION"])
        key_2 = estimation_2[0] + '\t' + estimation_2[1]
        key_verbose = key_1 + '\t' + key_2
        key = key_verbose.split('\t')

        # Assign monotonic track to minor:
        if key[3] == 'monotonic' and key[0] == key[2]:
            key = '{0}\tminor'.format(key[0])
        else:
            key = key_1
    else:
        key = key_1

    textfile = open(output_text_file, 'w')
    textfile.write(key)
    textfile.close()

    return key, correlation_value

Example #5

Show file

def estimate_key(input_audio_file, output_text_file=None, key_profile=None):
    """
    This function estimates the overall key of an audio track
    optionaly with extra modal information.
    :type input_audio_file: str
    :type output_text_file: str
    """

    if key_profile is not None:
        global USE_THREE_PROFILES
        global WITH_MODAL_DETAILS
        global KEY_PROFILE

        KEY_PROFILE = key_profile
        USE_THREE_PROFILES = False
        WITH_MODAL_DETAILS = False

    loader = estd.MonoLoader(filename=input_audio_file, sampleRate=SAMPLE_RATE)
    cut = estd.FrameCutter(frameSize=WINDOW_SIZE, hopSize=HOP_SIZE)
    window = estd.Windowing(size=WINDOW_SIZE, type=WINDOW_SHAPE)
    rfft = estd.Spectrum(size=WINDOW_SIZE)
    sw = estd.SpectralWhitening(maxFrequency=MAX_HZ, sampleRate=SAMPLE_RATE)
    speaks = estd.SpectralPeaks(magnitudeThreshold=SPECTRAL_PEAKS_THRESHOLD,
                                maxFrequency=MAX_HZ,
                                minFrequency=MIN_HZ,
                                maxPeaks=SPECTRAL_PEAKS_MAX,
                                sampleRate=SAMPLE_RATE)
    hpcp = estd.HPCP(
        bandPreset=HPCP_BAND_PRESET,
        #bandSplitFrequency=HPCP_SPLIT_HZ,
        harmonics=HPCP_HARMONICS,
        maxFrequency=MAX_HZ,
        minFrequency=MIN_HZ,
        nonLinear=HPCP_NON_LINEAR,
        normalized=HPCP_NORMALIZE,
        referenceFrequency=HPCP_REFERENCE_HZ,
        sampleRate=SAMPLE_RATE,
        size=HPCP_SIZE,
        weightType=HPCP_WEIGHT_TYPE,
        windowSize=HPCP_WEIGHT_WINDOW_SEMITONES,
        maxShifted=HPCP_SHIFT)
    if HIGHPASS_CUTOFF is not None:
        hpf = estd.HighPass(cutoffFrequency=HIGHPASS_CUTOFF,
                            sampleRate=SAMPLE_RATE)
        audio = hpf(hpf(hpf(loader())))
    else:
        audio = loader()
    duration = len(audio)
    n_slices = 1 + (duration // HOP_SIZE)
    chroma = np.empty([n_slices, HPCP_SIZE], dtype='float64')
    for slice_n in range(n_slices):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)
        if SPECTRAL_WHITENING:
            p2 = sw(spek, p1, p2)
        pcp = hpcp(p1, p2)
        if not DETUNING_CORRECTION or DETUNING_CORRECTION_SCOPE == 'average':
            chroma[slice_n] = pcp
        elif DETUNING_CORRECTION and DETUNING_CORRECTION_SCOPE == 'frame':
            pcp = shift_pcp(pcp, HPCP_SIZE)
            chroma[slice_n] = pcp
        else:
            raise NameError("SHIFT_SCOPE must be set to 'frame' or 'average'.")
    chroma = np.sum(chroma, axis=0)
    if PCP_THRESHOLD is not None:
        chroma = normalize_pcp_peak(chroma)
        chroma = pcp_gate(chroma, PCP_THRESHOLD)
    if DETUNING_CORRECTION and DETUNING_CORRECTION_SCOPE == 'average':
        chroma = shift_pcp(chroma, HPCP_SIZE)
    chroma = np.roll(
        chroma, -3)  # Adjust to essentia's HPCP calculation starting on A...
    if USE_THREE_PROFILES:
        estimation_1 = template_matching_3(chroma, KEY_PROFILE)
    else:
        estimation_1 = template_matching_2(chroma, KEY_PROFILE)
    key_1 = estimation_1[0] + '\t' + estimation_1[1]
    correlation_value = estimation_1[2]
    if WITH_MODAL_DETAILS:
        estimation_2 = template_matching_modal(chroma)
        key_2 = estimation_2[0] + '\t' + estimation_2[1]
        key_verbose = key_1 + '\t' + key_2
        key = key_verbose.split('\t')
        # Assign monotonic tracks to minor:
        if key[3] == 'monotonic' and key[0] == key[2]:
            key = '{0}\tminor'.format(key[0])
        else:
            key = key_1
    else:
        key = key_1
    if output_text_file is not None:
        textfile = open(output_text_file, 'w')
        textfile.write(key + '\t' + str(correlation_value) + '\n')
        textfile.close()
    return key, correlation_value

Example #6

Show file

def key_ecir(input_audio_file, output_text_file, **kwargs):

    if not kwargs:
        kwargs = KEY_SETTINGS

    loader = estd.MonoLoader(filename=input_audio_file,
                             sampleRate=kwargs["SAMPLE_RATE"])
    cut = estd.FrameCutter(frameSize=kwargs["WINDOW_SIZE"],
                           hopSize=kwargs["HOP_SIZE"])
    window = estd.Windowing(size=kwargs["WINDOW_SIZE"],
                            type=kwargs["WINDOW_SHAPE"])
    rfft = estd.Spectrum(size=kwargs["WINDOW_SIZE"])
    sw = estd.SpectralWhitening(maxFrequency=kwargs["MAX_HZ"],
                                sampleRate=kwargs["SAMPLE_RATE"])
    speaks = estd.SpectralPeaks(
        magnitudeThreshold=kwargs["SPECTRAL_PEAKS_THRESHOLD"],
        maxFrequency=kwargs["MAX_HZ"],
        minFrequency=kwargs["MIN_HZ"],
        maxPeaks=kwargs["SPECTRAL_PEAKS_MAX"],
        sampleRate=kwargs["SAMPLE_RATE"])
    hpcp = estd.HPCP(bandPreset=kwargs["HPCP_BAND_PRESET"],
                     splitFrequency=kwargs["HPCP_SPLIT_HZ"],
                     harmonics=kwargs["HPCP_HARMONICS"],
                     maxFrequency=kwargs["MAX_HZ"],
                     minFrequency=kwargs["MIN_HZ"],
                     nonLinear=kwargs["HPCP_NON_LINEAR"],
                     normalized=kwargs["HPCP_NORMALIZE"],
                     referenceFrequency=kwargs["HPCP_REFERENCE_HZ"],
                     sampleRate=kwargs["SAMPLE_RATE"],
                     size=kwargs["HPCP_SIZE"],
                     weightType=kwargs["HPCP_WEIGHT_TYPE"],
                     windowSize=kwargs["HPCP_WEIGHT_WINDOW_SEMITONES"],
                     maxShifted=kwargs["HPCP_SHIFT"])

    key = estd.Key(numHarmonics=kwargs["KEY_HARMONICS"],
                   pcpSize=kwargs["HPCP_SIZE"],
                   profileType=kwargs["KEY_PROFILE"],
                   slope=kwargs["KEY_SLOPE"],
                   usePolyphony=kwargs["KEY_POLYPHONY"],
                   useThreeChords=kwargs["KEY_USE_THREE_CHORDS"])

    audio = loader()

    if kwargs["HIGHPASS_CUTOFF"] is not None:
        hpf = estd.HighPass(cutoffFrequency=kwargs["HIGHPASS_CUTOFF"],
                            sampleRate=kwargs["SAMPLE_RATE"])
        audio = hpf(hpf(hpf(audio)))

    if kwargs["DURATION"] is not None:
        audio = audio[(kwargs["START_TIME"] *
                       kwargs["SAMPLE_RATE"]):(kwargs["DURATION"] *
                                               kwargs["SAMPLE_RATE"])]

    duration = len(audio)
    number_of_frames = int(duration / kwargs["HOP_SIZE"])
    chroma = []
    for bang in range(number_of_frames):
        spek = rfft(window(cut(audio)))
        p1, p2 = speaks(spek)  # p1 = frequencies; p2 = magnitudes
        if kwargs["SPECTRAL_WHITENING"]:
            p2 = sw(spek, p1, p2)
        vector = hpcp(p1, p2)
        sum_vector = np.sum(vector)

        if sum_vector > 0:
            if kwargs["DETUNING_CORRECTION"] == False or kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'average':
                chroma.append(vector)
            elif kwargs["DETUNING_CORRECTION"] and kwargs[
                    "DETUNING_CORRECTION_SCOPE"] == 'frame':
                vector = _detuning_correction(vector, kwargs["HPCP_SIZE"])
                chroma.append(vector)
            else:
                print("SHIFT_SCOPE must be set to 'frame' or 'average'")

    chroma = np.mean(chroma, axis=0)

    if kwargs["DETUNING_CORRECTION"] and kwargs[
            "DETUNING_CORRECTION_SCOPE"] == 'average':
        chroma = _detuning_correction(chroma, kwargs["HPCP_SIZE"])
    key = key(chroma.tolist())
    confidence = (key[2], key[3])
    key = key[0] + '\t' + key[1]
    textfile = open(output_text_file, 'w')
    textfile.write(key + '\n')
    textfile.close()
    return key, confidence

Example #7

Show file

File: features.py Project: ctralie/acoss

    def hpcp(self,
            frameSize=4096,
            windowType='blackmanharris62',
            harmonicsPerPeak=8,
            magnitudeThreshold=0,
            maxPeaks=100,
            whitening=True,
            referenceFrequency=440,
            minFrequency=100,
            maxFrequency=3500,
            nonLinear=False,
            numBins=12,
            display=False):
        """
        Compute Harmonic Pitch Class Profiles (HPCP) for the input audio files using essentia standard mode using
        the default parameters as mentioned in [1].
        Please refer to the following paper for detailed explanantion of the algorithm.
        [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.
        For full list of parameters of essentia standard mode HPCP 
        please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html
        
        Returns
        hpcp: ndarray(n_frames, 12)
            The HPCP coefficients at each time frame
        """
        audio = array(self.audio_vector)
        frameGenerator = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=self.hop_length)
        # framecutter = estd.FrameCutter(frameSize=frameSize, hopSize=self.hop_length)
        windowing = estd.Windowing(type=windowType)
        spectrum = estd.Spectrum()
        # Refer http://essentia.upf.edu/documentation/reference/streaming_SpectralPeaks.html
        spectralPeaks = estd.SpectralPeaks(magnitudeThreshold=magnitudeThreshold,
                                            maxFrequency=maxFrequency,
                                            minFrequency=minFrequency,
                                            maxPeaks=maxPeaks,
                                            orderBy="frequency",
                                            sampleRate=self.fs)
        # http://essentia.upf.edu/documentation/reference/streaming_SpectralWhitening.html
        spectralWhitening = estd.SpectralWhitening(maxFrequency= maxFrequency,
                                                    sampleRate=self.fs)
        # http://essentia.upf.edu/documentation/reference/streaming_HPCP.html
        hpcp = estd.HPCP(sampleRate=self.fs,
                        maxFrequency=maxFrequency,
                        minFrequency=minFrequency,
                        referenceFrequency=referenceFrequency,
                        nonLinear=nonLinear,
                        harmonics=harmonicsPerPeak,
                        size=numBins)
        pool = Pool()

        #compute hpcp for each frame and add the results to the pool
        for frame in frameGenerator:
            spectrum_mag = spectrum(windowing(frame))
            frequencies, magnitudes = spectralPeaks(spectrum_mag)
            if whitening:
                w_magnitudes = spectralWhitening(spectrum_mag,
                                                frequencies,
                                                magnitudes)
                hpcp_vector = hpcp(frequencies, w_magnitudes)
            else:
                hpcp_vector = hpcp(frequencies, magnitudes)
            pool.add('tonal.hpcp',hpcp_vector)

        if display:
            display_chroma(pool['tonal.hpcp'].T, self.hop_length)

        return pool['tonal.hpcp']

Example #8

Show file

def get_spectral_info(frame):
    """Gets spectrum frequencies and their magnitudes for a single frame"""
    spectrum = es.Spectrum(size=samples_per_frame)(frame)
    freqs, mags = es.SpectralPeaks(**peak_params)(spectrum)
    mags = es.SpectralWhitening()(spectrum, freqs, mags)
    return spectrum, freqs, mags

Example #9

Show file

def extractFeatures(audio_data):
    """
  Recebe um vetor de reais representando um sinal de áudio, calcula suas 
  features, agrega-as em uma Pool() de essentia e retorna esta Pool
  """
    from numpy import ndarray
    assert (type(audio_data) is ndarray)
    assert ("float" in str(audio_data.dtype))

    #Inicia Pool()
    output_pool = es.Pool()

    #Calcula espectro do sinal
    output_pool.set(pk_spectrum, es_mode.Spectrum()(audio_data))

    #Calcula EnergyBandRatio
    energy_band_ratio = es_mode.EnergyBandRatio()(output_pool[pk_spectrum])
    output_pool.set(pk_energy_band_ratio, energy_band_ratio)

    #Calcula MaxMagFreq
    max_mag_freq = es_mode.MaxMagFreq()(output_pool[pk_spectrum])
    output_pool.set(pk_max_mag_freq, max_mag_freq)

    #Calcula SpectralCentroidTime
    spectral_centroid_time = es_mode.SpectralCentroidTime()(audio_data)
    output_pool.set(pk_spectral_centroid_time, spectral_centroid_time)

    #Calcula SpectralComplexity
    spectral_complexity = es_mode.SpectralComplexity()(
        output_pool[pk_spectrum])
    output_pool.set(pk_spectral_complexity, spectral_complexity)

    #Calcula StrongPeak
    strong_peak = es_mode.StrongPeak()(output_pool[pk_spectrum])
    output_pool.set(pk_strong_peak, strong_peak)

    #Calcula SpectralPeaks
    sp_freq, sp_mag = es_mode.SpectralPeaks()(output_pool[pk_spectrum])
    #corta o DC, se houver, e pedido de HarmonicPeaks
    if sp_freq[0] == 0:
        sp_freq = sp_freq[1:]
        sp_mag = sp_mag[1:]
    output_pool.set(pk_spectral_peaks_freq, sp_freq)
    output_pool.set(pk_spectral_peaks_mag, sp_mag)

    ######################################
    #       Para Inharmonicity           #
    ######################################
    #Calcula PitchYinFFT
    pitch_yin_fft, pitch_prob_yin_fft = es_mode.PitchYinFFT()(
        output_pool[pk_spectrum])
    output_pool.set(pk_pitch, pitch_yin_fft)
    output_pool.set(pk_pitch_prob, pitch_prob_yin_fft)

    #Calcula HarmonicPeaks
    hp_freq, hp_mag = es_mode.HarmonicPeaks()(output_pool[pk_spectral_peaks_freq],\
                                              output_pool[pk_spectral_peaks_mag],\
                                              output_pool[pk_pitch] )
    output_pool.set(pk_harmonic_peaks_freq, hp_freq)
    output_pool.set(pk_harmonic_peaks_mag, hp_mag)

    #Calcula Inharmonicity
    inharmonicity = es_mode.Inharmonicity()(output_pool[pk_harmonic_peaks_freq],\
                                            output_pool[pk_harmonic_peaks_mag])
    output_pool.set(pk_inharmonicity, inharmonicity)

    #Acaba Inharmonicity#####################################

    #Calcula SpectralContrast
    frame_size = 2 * (output_pool[pk_spectrum].size - 1)
    spectral_contrast, spectral_valley = \
        es_mode.SpectralContrast(frameSize=frame_size)(output_pool[pk_spectrum])
    output_pool.set(pk_spectral_contrast, spectral_contrast)
    output_pool.set(pk_spectral_valley, spectral_valley)

    #Calcula SpectralWhitening
    spectral_whitening = \
                es_mode.SpectralWhitening()(output_pool[pk_spectrum],\
                                            output_pool[pk_spectral_peaks_freq],\
                                            output_pool[pk_spectral_peaks_mag])
    output_pool.set(pk_spectral_whitening, spectral_whitening)

    return output_pool

Example #10

Show file

def key_detector():
    reloj()
    # create directory to write the results with an unique time id:
    if results_to_file or results_to_csv:
        uniqueTime = str(int(tiempo()))
        wd = os.getcwd()
        temp_folder = wd + '/KeyDetection_' + uniqueTime
        os.mkdir(temp_folder)
    if results_to_csv:
        import csv
        csvFile = open(temp_folder + '/Estimation_&_PCP.csv', 'w')
        lineWriter = csv.writer(csvFile, delimiter=',')
    # retrieve files and filenames according to the desired settings:
    if analysis_mode == 'title':
        allfiles = os.listdir(audio_folder)
        if '.DS_Store' in allfiles: allfiles.remove('.DS_Store')
        for item in collection:
            collection[collection.index(item)] = ' > ' + item + '.'
        for item in genre:
            genre[genre.index(item)] = ' < ' + item + ' > '
        for item in modality:
            modality[modality.index(item)] = ' ' + item + ' < '
        analysis_files = []
        for item in allfiles:
            if any(e1 for e1 in collection if e1 in item):
                if any(e2 for e2 in genre if e2 in item):
                    if any(e3 for e3 in modality if e3 in item):
                        analysis_files.append(item)
        song_instances = len(analysis_files)
        print song_instances, 'songs matching the selected criteria:'
        print collection, genre, modality
        if limit_analysis == 0:
            pass
        elif limit_analysis < song_instances:
            analysis_files = sample(analysis_files, limit_analysis)
            print "taking", limit_analysis, "random samples...\n"
    else:
        analysis_files = os.listdir(audio_folder)
        if '.DS_Store' in analysis_files:
            analysis_files.remove('.DS_Store')
        print len(analysis_files), '\nsongs in folder.\n'
        groundtruth_files = os.listdir(groundtruth_folder)
        if '.DS_Store' in groundtruth_files:
            groundtruth_files.remove('.DS_Store')
    # ANALYSIS
    # ========
    if verbose:
        print "ANALYSING INDIVIDUAL SONGS..."
        print "============================="
    if confusion_matrix:
        matrix = 24 * 24 * [0]
    mirex_scores = []
    for item in analysis_files:
        # INSTANTIATE ESSENTIA ALGORITHMS
        # ===============================
        loader = estd.MonoLoader(filename=audio_folder + '/' + item,
                                 sampleRate=sample_rate)
        cut = estd.FrameCutter(frameSize=window_size, hopSize=hop_size)
        window = estd.Windowing(size=window_size, type=window_type)
        rfft = estd.Spectrum(size=window_size)
        sw = estd.SpectralWhitening(maxFrequency=max_frequency,
                                    sampleRate=sample_rate)
        speaks = estd.SpectralPeaks(magnitudeThreshold=magnitude_threshold,
                                    maxFrequency=max_frequency,
                                    minFrequency=min_frequency,
                                    maxPeaks=max_peaks,
                                    sampleRate=sample_rate)
        hpcp = estd.HPCP(bandPreset=band_preset,
                         harmonics=harmonics,
                         maxFrequency=max_frequency,
                         minFrequency=min_frequency,
                         nonLinear=non_linear,
                         normalized=normalize,
                         referenceFrequency=reference_frequency,
                         sampleRate=sample_rate,
                         size=hpcp_size,
                         splitFrequency=split_frequency,
                         weightType=weight_type,
                         windowSize=weight_window_size)
        key = estd.Key(numHarmonics=num_harmonics,
                       pcpSize=hpcp_size,
                       profileType=profile_type,
                       slope=slope,
                       usePolyphony=use_polyphony,
                       useThreeChords=use_three_chords)
        # ACTUAL ANALYSIS
        # ===============
        audio = loader()
        duration = len(audio)
        if skip_first_minute and duration > (sample_rate * 60):
            audio = audio[sample_rate * 60:]
            duration = len(audio)
        if first_n_secs > 0:
            if duration > (first_n_secs * sample_rate):
                audio = audio[:first_n_secs * sample_rate]
                duration = len(audio)
        if avoid_edges > 0:
            initial_sample = (avoid_edges * duration) / 100
            final_sample = duration - initial_sample
            audio = audio[initial_sample:final_sample]
            duration = len(audio)
        number_of_frames = duration / hop_size
        chroma = []
        for bang in range(number_of_frames):
            spek = rfft(window(cut(audio)))
            p1, p2 = speaks(spek)  # p1 are frequencies; p2 magnitudes
            if spectral_whitening:
                p2 = sw(spek, p1, p2)
            vector = hpcp(p1, p2)
            sum_vector = np.sum(vector)
            if sum_vector > 0:
                if shift_spectrum == False or shift_scope == 'average':
                    chroma.append(vector)
                elif shift_spectrum and shift_scope == 'frame':
                    vector = shift_vector(vector, hpcp_size)
                    chroma.append(vector)
                else:
                    print "shift_scope must be set to 'frame' or 'average'"
        chroma = np.mean(chroma, axis=0)
        if shift_spectrum and shift_scope == 'average':
            chroma = shift_vector(chroma, hpcp_size)
        estimation = key(chroma.tolist())
        result = estimation[0] + ' ' + estimation[1]
        confidence = estimation[2]
        if results_to_csv:
            chroma = list(chroma)
        # MIREX EVALUATION:
        # ================
        if analysis_mode == 'title':
            ground_truth = item[item.find(' = ') + 3:item.rfind(' < ')]
            if verbose and confidence < confidence_threshold:
                print item[:item.rfind(' = ')]
                print 'G:', ground_truth, '|| P:',
            if results_to_csv:
                title = item[:item.rfind(' = ')]
                lineWriter.writerow([
                    title, ground_truth, chroma[0], chroma[1], chroma[2],
                    chroma[3], chroma[4], chroma[5], chroma[6], chroma[7],
                    chroma[8], chroma[9], chroma[10], chroma[11], chroma[12],
                    chroma[13], chroma[14], chroma[15], chroma[16], chroma[17],
                    chroma[18], chroma[19], chroma[20], chroma[21], chroma[22],
                    chroma[23], chroma[24], chroma[25], chroma[26], chroma[27],
                    chroma[28], chroma[29], chroma[30], chroma[31], chroma[32],
                    chroma[33], chroma[34], chroma[35], result
                ])
            ground_truth = key_to_list(ground_truth)
            estimation = key_to_list(result)
            score = mirex_score(ground_truth, estimation)
            mirex_scores.append(score)
        else:
            filename_to_match = item[:item.rfind('.')] + '.txt'
            print filename_to_match
            if filename_to_match in groundtruth_files:
                groundtruth_file = open(
                    groundtruth_folder + '/' + filename_to_match, 'r')
                ground_truth = groundtruth_file.readline()
                if "\t" in ground_truth:
                    ground_truth = re.sub("\t", " ", ground_truth)
                if results_to_csv:
                    lineWriter.writerow([
                        filename_to_match, chroma[0], chroma[1], chroma[2],
                        chroma[3], chroma[4], chroma[5], chroma[6], chroma[7],
                        chroma[8], chroma[9], chroma[10], chroma[11],
                        chroma[12], chroma[13], chroma[14], chroma[15],
                        chroma[16], chroma[17], chroma[18], chroma[19],
                        chroma[20], chroma[21], chroma[22], chroma[23],
                        chroma[24], chroma[25], chroma[26], chroma[27],
                        chroma[28], chroma[29], chroma[30], chroma[31],
                        chroma[32], chroma[33], chroma[34], chroma[35], result
                    ])
                ground_truth = key_to_list(ground_truth)
                estimation = key_to_list(result)
                score = mirex_score(ground_truth, estimation)
                mirex_scores.append(score)
            else:
                print "FILE NOT FOUND... Skipping it from evaluation.\n"
                continue
        # CONFUSION MATRIX:
        # ================
        if confusion_matrix:
            xpos = (ground_truth[0] +
                    (ground_truth[0] * 24)) + (-1 *
                                               (ground_truth[1] - 1) * 24 * 12)
            ypos = ((estimation[0] - ground_truth[0]) +
                    (-1 * (estimation[1] - 1) * 12))
            matrix[(xpos + ypos)] = +matrix[(xpos + ypos)] + 1
        if verbose and confidence < confidence_threshold:
            print result, '(%.2f)' % confidence, '|| SCORE:', score, '\n'
        # WRITE RESULTS TO FILE:
        # =====================
        if results_to_file:
            with open(temp_folder + '/' + item[:-3] + 'txt', 'w') as textfile:
                textfile.write(result)
                textfile.close()
    if results_to_csv:
        csvFile.close()
    print len(mirex_scores), "files analysed in", reloj(), "secs.\n"
    if confusion_matrix:
        matrix = np.matrix(matrix)
        matrix = matrix.reshape(24, 24)
        print matrix
        if results_to_file:
            np.savetxt(
                temp_folder + '/_confusion_matrix.csv',
                matrix,
                fmt='%i',
                delimiter=',',
                header=
                'C,C#,D,Eb,E,F,F#,G,G#,A,Bb,B,Cm,C#m,Dm,Ebm,Em,Fm,F#m,Gm,G#m,Am,Bbm,Bm'
            )
    # MIREX RESULTS
    # =============
    evaluation_results = mirex_evaluation(mirex_scores)
    # WRITE INFO TO FILE
    # ==================
    if results_to_file:
        settings = "SETTINGS\n========\nAvoid edges ('%' of duration disregarded at both ends (0 = complete)) = " + str(
            avoid_edges
        ) + "\nfirst N secs = " + str(
            first_n_secs
        ) + "\nshift spectrum to fit tempered scale = " + str(
            shift_spectrum
        ) + "\nspectral whitening = " + str(
            spectral_whitening
        ) + "\nsample rate = " + str(sample_rate) + "\nwindow size = " + str(
            window_size
        ) + "\nhop size = " + str(hop_size) + "\nmagnitude threshold = " + str(
            magnitude_threshold
        ) + "\nminimum frequency = " + str(
            min_frequency
        ) + "\nmaximum frequency = " + str(
            max_frequency
        ) + "\nmaximum peaks = " + str(max_peaks) + "\nband preset = " + str(
            band_preset
        ) + "\nsplit frequency = " + str(
            split_frequency
        ) + "\nharmonics = " + str(harmonics) + "\nnon linear = " + str(
            non_linear
        ) + "\nnormalize = " + str(
            normalize
        ) + "\nreference frequency = " + str(
            reference_frequency
        ) + "\nhpcp size = " + str(
            hpcp_size
        ) + "\nweigth type = " + weight_type + "\nweight window size in semitones = " + str(
            weight_window_size
        ) + "\nharmonics key = " + str(num_harmonics) + "\nslope = " + str(
            slope) + "\nprofile = " + profile_type + "\npolyphony = " + str(
                use_polyphony) + "\nuse three chords = " + str(
                    use_three_chords)
        results_for_file = "\n\nEVALUATION RESULTS\n==================\nCorrect: " + str(
            evaluation_results[0]) + "\nFifth:  " + str(
                evaluation_results[1]) + "\nRelative: " + str(
                    evaluation_results[2]) + "\nParallel: " + str(
                        evaluation_results[3]) + "\nError: " + str(
                            evaluation_results[4]) + "\nWeighted: " + str(
                                evaluation_results[5])
        write_to_file = open(temp_folder + '/_SUMMARY.txt', 'w')
        write_to_file.write(settings)
        write_to_file.write(results_for_file)
        if analysis_mode == 'title':
            corpus = "\n\nANALYSIS CORPUS\n===============\n" + str(
                collection) + '\n' + str(
                    genre) + '\n' + str(modality) + '\n\n' + str(
                        len(mirex_scores)) + " files analysed.\n"
            write_to_file.write(corpus)
        write_to_file.close()