Beispiel #1
0
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(  # pylint: disable-msg=E1101
            zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(  # pylint: disable-msg=E1101
            size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(  # pylint: disable-msg=E1101
            minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate,
            orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = \
            estd.PitchSalienceFunction(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = \
            estd.PitchSalienceFunctionPeaks(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution,
                minFrequency=self.min_frequency,
                maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(  # pylint: disable-msg=E1101
            hopSize=self.hop_size,
            binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(
                audio,  # pylint: disable-msg=E1101
                frameSize=self.frame_size,
                hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                [f.tolist()
                 for f in pool['allframes_salience_peaks_bins']],
                [f.tolist()
                 for f in pool['allframes_salience_peaks_contourSaliences']])
        return contours_bins, contours_start_times, contour_saliences, duration
filename = '../../../sounds/carnatic.wav'
hopSize = 128
frameSize = 2048
sampleRate = 44100
guessUnvoiced = True

run_windowing = ess.Windowing(type='hann', zeroPadding=3 * frameSize)  # Hann window with x4 zero padding
run_spectrum = ess.Spectrum(size=frameSize * 4)
run_spectral_peaks = ess.SpectralPeaks(minFrequency=50,
                                       maxFrequency=10000,
                                       maxPeaks=100,
                                       sampleRate=sampleRate,
                                       magnitudeThreshold=0,
                                       orderBy="magnitude")
run_pitch_salience_function = ess.PitchSalienceFunction(magnitudeThreshold=60)
run_pitch_salience_function_peaks = ess.PitchSalienceFunctionPeaks(minFrequency=90, maxFrequency=800)
run_pitch_contours = ess.PitchContours(hopSize=hopSize, peakFrameThreshold=0.7)
run_pitch_contours_melody = ess.PitchContoursMelody(guessUnvoiced=guessUnvoiced,
                                                    hopSize=hopSize)

pool = essentia.Pool();

audio = ess.MonoLoader(filename=filename)()
audio = ess.EqualLoudness()(audio)

for frame in ess.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
    frame = run_windowing(frame)
    spectrum = run_spectrum(frame)
    peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
    salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
mpl.use('Agg')
from pylab import *
from numpy import *

filename = '../../../sounds/orchestra.wav'
fs = 44100
H = 1024
M = 2048
N = 2 * M
guessUnvoiced = True

window = ess.Windowing(type='hann', zeroPadding=N - M)
spectrum = ess.Spectrum(size=N)
spectralPeaks = ess.SpectralPeaks(minFrequency=50, maxFrequency=10000, maxPeaks=100, sampleRate=fs,
                                  magnitudeThreshold=0, orderBy="magnitude")
pitchSalienceFunction = ess.PitchSalienceFunction()
pitchSalienceFunctionPeaks = ess.PitchSalienceFunctionPeaks(minFrequency=100, maxFrequency=300)

x = ess.MonoLoader(filename=filename, sampleRate=fs)()
x = ess.EqualLoudness()(x)
totalSaliences = []

for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H):
    frame = window(frame)
    mX = spectrum(frame)
    peak_frequencies, peak_magnitudes = spectralPeaks(mX)
    pitchSalienceFunction_vals = pitchSalienceFunction(peak_frequencies, peak_magnitudes)
    salience_peaks_bins_vals, salience_peaks_saliences_vals = pitchSalienceFunctionPeaks(pitchSalienceFunction_vals)
    totalSaliences.append(max(salience_peaks_saliences_vals))

totalSaliences = np.array(totalSaliences)