def _extract_pitch_contours(self, audio): # Hann window with x4 zero padding run_windowing = estd.Windowing( # pylint: disable-msg=E1101 zeroPadding=3 * self.frame_size) run_spectrum = estd.Spectrum( # pylint: disable-msg=E1101 size=self.frame_size * 4) run_spectral_peaks = estd.SpectralPeaks( # pylint: disable-msg=E1101 minFrequency=self.min_frequency, maxFrequency=self.max_frequency, magnitudeThreshold=self.magnitude_threshold, sampleRate=self.sample_rate, orderBy='magnitude') # convert unit to cents, PitchSalienceFunction takes 55 Hz as the # default reference run_pitch_salience_function = \ estd.PitchSalienceFunction( # pylint: disable-msg=E1101 binResolution=self.bin_resolution) run_pitch_salience_function_peaks = \ estd.PitchSalienceFunctionPeaks( # pylint: disable-msg=E1101 binResolution=self.bin_resolution, minFrequency=self.min_frequency, maxFrequency=self.max_frequency) run_pitch_contours = estd.PitchContours( # pylint: disable-msg=E1101 hopSize=self.hop_size, binResolution=self.bin_resolution, peakDistributionThreshold=self.peak_distribution_threshold) # compute frame by frame pool = Pool() for frame in estd.FrameGenerator( audio, # pylint: disable-msg=E1101 frameSize=self.frame_size, hopSize=self.hop_size): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contour_saliences = \ run_pitch_salience_function_peaks(salience) if not np.size(salience_peaks_bins): salience_peaks_bins = np.array([0]) if not np.size(salience_peaks_contour_saliences): salience_peaks_contour_saliences = np.array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contour_saliences) # post-processing: contour tracking contours_bins, contour_saliences, contours_start_times, duration = \ run_pitch_contours( [f.tolist() for f in pool['allframes_salience_peaks_bins']], [f.tolist() for f in pool['allframes_salience_peaks_contourSaliences']]) return contours_bins, contours_start_times, contour_saliences, duration
filename = '../../../sounds/carnatic.wav' hopSize = 128 frameSize = 2048 sampleRate = 44100 guessUnvoiced = True run_windowing = ess.Windowing(type='hann', zeroPadding=3 * frameSize) # Hann window with x4 zero padding run_spectrum = ess.Spectrum(size=frameSize * 4) run_spectral_peaks = ess.SpectralPeaks(minFrequency=50, maxFrequency=10000, maxPeaks=100, sampleRate=sampleRate, magnitudeThreshold=0, orderBy="magnitude") run_pitch_salience_function = ess.PitchSalienceFunction(magnitudeThreshold=60) run_pitch_salience_function_peaks = ess.PitchSalienceFunctionPeaks(minFrequency=90, maxFrequency=800) run_pitch_contours = ess.PitchContours(hopSize=hopSize, peakFrameThreshold=0.7) run_pitch_contours_melody = ess.PitchContoursMelody(guessUnvoiced=guessUnvoiced, hopSize=hopSize) pool = essentia.Pool(); audio = ess.MonoLoader(filename=filename)() audio = ess.EqualLoudness()(audio) for frame in ess.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
mpl.use('Agg') from pylab import * from numpy import * filename = '../../../sounds/orchestra.wav' fs = 44100 H = 1024 M = 2048 N = 2 * M guessUnvoiced = True window = ess.Windowing(type='hann', zeroPadding=N - M) spectrum = ess.Spectrum(size=N) spectralPeaks = ess.SpectralPeaks(minFrequency=50, maxFrequency=10000, maxPeaks=100, sampleRate=fs, magnitudeThreshold=0, orderBy="magnitude") pitchSalienceFunction = ess.PitchSalienceFunction() pitchSalienceFunctionPeaks = ess.PitchSalienceFunctionPeaks(minFrequency=100, maxFrequency=300) x = ess.MonoLoader(filename=filename, sampleRate=fs)() x = ess.EqualLoudness()(x) totalSaliences = [] for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H): frame = window(frame) mX = spectrum(frame) peak_frequencies, peak_magnitudes = spectralPeaks(mX) pitchSalienceFunction_vals = pitchSalienceFunction(peak_frequencies, peak_magnitudes) salience_peaks_bins_vals, salience_peaks_saliences_vals = pitchSalienceFunctionPeaks(pitchSalienceFunction_vals) totalSaliences.append(max(salience_peaks_saliences_vals)) totalSaliences = np.array(totalSaliences)