def run(self, fname): # load audio and eqLoudness # Note: MonoLoader resamples the audio signal to 44100 Hz by default audio = estd.MonoLoader(filename=fname)() audio = estd.EqualLoudness()(audio) contours_bins, contours_start_times, contour_saliences, duration = \ self._extract_pitch_contours(audio) # run the simplified contour selection [pitch, pitch_salience] = self.select_contours(contours_bins, contour_saliences, contours_start_times, duration) # cent to Hz conversion pitch = [ 0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.) for p in pitch ] pitch = e_array(pitch) pitch_salience = e_array(pitch_salience) # pitch filter if self.filter_pitch: pitch, pitch_salience = self._post_filter_pitch( pitch, pitch_salience) # generate time stamps time_stamps = self._gen_time_stamps(0, len(pitch)) # [time pitch salience] matrix out = np.transpose( np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist()))) out = out.tolist() # settings settings = self.get_settings() settings.update({'source': fname}) return {'pitch': out, 'settings': settings}
def run(self, fname): # load audio and eqLoudness # Note: MonoLoader resamples the audio signal to 44100 Hz by default audio = estd.MonoLoader(filename=fname)() audio = estd.EqualLoudness()(audio) contours_bins, contours_start_times, contour_saliences, duration = \ self._extract_pitch_contours(audio) # run the simplified contour selection [pitch, pitch_salience] = self.select_contours( contours_bins, contour_saliences, contours_start_times, duration) # cent to Hz conversion pitch = [0. if p == 0 else 55. * 2. ** (self.bin_resolution * p / 1200.) for p in pitch] pitch = e_array(pitch) pitch_salience = e_array(pitch_salience) # pitch filter if self.filter_pitch: pitch, pitch_salience = self._post_filter_pitch( pitch, pitch_salience) # generate time stamps time_stamps = self._gen_time_stamps(0, len(pitch)) # [time pitch salience] matrix out = np.transpose( np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist()))) out = out.tolist() # settings settings = self.get_settings() settings.update({'source': fname}) return {'pitch': out, 'settings': settings}
def run(self, audio): contours_bins, contours_start_times, contour_saliences, duration = \ self._extract_pitch_contours(audio) tmp = [] tmp_times = [] for i, j in zip(contours_bins, contours_start_times): values = [ 0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.) for p in i ] time_stamps = self._gen_time_stamps(0, len(i)) time_stamps = [x + j for x in time_stamps] tmp.append(values) tmp_times.append(time_stamps) #print(np.shape(tmp)) #print(np.shape(tmp_times)) #y, sr = librosa.load(fname) #D = librosa.stft(y) # STFT of y #S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max) #plt.figure(figsize=[30, 12]) #fig, ax = plt.subplots() #for i, j in zip(tmp_times, tmp): #plt.plot(i, j, linewidth=1.0, c='b', zorder=10) #img = librosa.display.specshow(S_db, x_axis='time', y_axis='linear', ax=ax) #ax.set(title='Now with labeled axes!') #fig.colorbar(img, ax=ax, format="%+2.f dB") # run the simplified contour selection [pitch, pitch_salience] = self.select_contours(contours_bins, contour_saliences, contours_start_times, duration) # cent to Hz conversion pitch = [ 0. if p == 0 else 55. * 2.**(self.bin_resolution * p / 1200.) for p in pitch ] pitch = e_array(pitch) pitch_salience = e_array(pitch_salience) # pitch filter if self.filter_pitch: pitch, pitch_salience = self._post_filter_pitch( pitch, pitch_salience) # generate time stamps time_stamps = self._gen_time_stamps(0, len(pitch)) #plt.plot(time_stamps, pitch.tolist(), c='orange', linewidth=2.5, zorder=0) #filename_to_plot = fname.split('/')[1].replace('.wav', '') #plt.savefig('outputs_figs/' + filename_to_plot + '.png') #plt.show() # [time pitch salience] matrix out = np.transpose( np.vstack((time_stamps, pitch.tolist(), pitch_salience.tolist()))) out = out.tolist() # settings settings = self.get_settings() return {'pitch': out, 'settings': settings}
def run(self, musicbrainzid, fname): citation = u""" Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X. (2014). Audio feature extraction for exploring Turkish makam music. In Proceedings of 3rd International Conference on Audio Technologies for Music and Media, Ankara, Turkey. """ run_windowing = estd.Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding run_spectrum = estd.Spectrum(size=self.settings.frameSize * 4) run_spectral_peaks = estd.SpectralPeaks(minFrequency=self.settings.minFrequency, maxFrequency = self.settings.maxFrequency, sampleRate = self.settings.sampleRate, magnitudeThreshold = self.settings.magnitudeThreshold, orderBy = 'magnitude') run_pitch_salience_function = estd.PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference run_pitch_salience_function_peaks = estd.PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution, minFrequency=self.settings.minFrequency, maxFrequency = self.settings.maxFrequency) run_pitch_contours = estd.PitchContours(hopSize=self.settings.hopSize, binResolution=self.settings.binResolution, peakDistributionThreshold = self.settings.peakDistributionThreshold) run_pitch_filter = estd.PitchFilter(confidenceThreshold=self.settings.confidenceThreshold, minChunkSize=self.settings.minChunkSize) pool = Pool() # load audio and eqLoudness audio = estd.MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default audio = estd.EqualLoudness()(audio) for frame in estd.FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience) if not size(salience_peaks_bins): salience_peaks_bins = array([0]) if not size(salience_peaks_contourSaliences): salience_peaks_contourSaliences = array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences) # post-processing: contour tracking contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_contourSaliences']) # run the simplified contour selection [pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration) # cent to Hz conversion pitch = e_array([0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch]) pitch_salience = e_array(pitch_salience) # pitch filter if self.settings.filterPitch: pitch = run_pitch_filter(pitch, pitch_salience) # generate time stamps time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))] # [time pitch salience] matrix out = transpose(vstack((time_stamps, pitch.tolist(), pitch_salience.tolist()))) out = out.tolist() # settings settings = self.settings settings.update({'version':self._version, 'slug':self._slug, 'source': fname, 'essentiaVersion': essentia_version, 'pitchUnit': 'Hz', 'citation': citation}) # matlab matout = cStringIO.StringIO() matob = {'pitch': out} matob.update(settings) scipy.io.savemat(matout, matob) return {'pitch': out, 'matlab': matout.getvalue(), 'settings': settings}