def get_onsets(self, in_filename): # print in_filename # Load the audio (in mono) audio, sampleRate, numChan = AudioLoader(filename=in_filename)() audio = MonoLoader(filename=in_filename)() self.sampleRate = sampleRate # 1) Compute onset detection functions od = OnsetDetection(method='rms') w = Windowing(type='hann') fft = FFT() c2p = CartesianToPolar() pool_features = Pool() # print 'Computing onset detection functions' for frame in FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size): mag, phase = c2p(fft(w(frame))) pool_features.add('features.rms', od(mag, phase)) # 2) Compute the onset locations onsets = Onsets(silenceThreshold=0.14, delay=10) # print 'Computing onset locations' onsets_rms = onsets( array([ pool_features['features.rms'] ]), [ 1 ]) print "Num onsets: " + str(len(onsets_rms)) return onsets_rms
def pca(pool, namespace=''): llspace = 'lowlevel.' if namespace: llspace = namespace + '.lowlevel.' sccoeffs = pool[llspace + 'sccoeffs'] scvalleys = pool[llspace + 'scvalleys'] numFrames = len(sccoeffs) poolSc = Pool() merged = essentia.zeros(2*len(sccoeffs[0])) for frame in xrange(numFrames): j = 0 for i in xrange(len(sccoeffs[frame])): merged[j]=sccoeffs[frame][i] merged[j+1]=scvalleys[frame][i] j+=2 poolSc.add('contrast', merged) poolTransformed = standard.PCA(namespaceIn='contrast', namespaceOut='contrast')(poolSc) contrast = poolTransformed['contrast'] pool.set(llspace+'spectral_contrast.mean', mean(contrast, axis=0)) pool.set(llspace+'spectral_contrast.var', var(contrast, axis=0)) pool.remove(llspace+'sccoeffs') pool.remove(llspace+'scvalleys')
def pca(pool, namespace=''): llspace = 'lowlevel.' if namespace: llspace = namespace + '.lowlevel.' sccoeffs = pool[llspace + 'sccoeffs'] scvalleys = pool[llspace + 'scvalleys'] numFrames = len(sccoeffs) poolSc = Pool() merged = essentia.zeros(2 * len(sccoeffs[0])) for frame in xrange(numFrames): j = 0 for i in xrange(len(sccoeffs[frame])): merged[j] = sccoeffs[frame][i] merged[j + 1] = scvalleys[frame][i] j += 2 poolSc.add('contrast', merged) poolTransformed = standard.PCA(namespaceIn='contrast', namespaceOut='contrast')(poolSc) contrast = poolTransformed['contrast'] pool.set(llspace + 'spectral_contrast.mean', mean(contrast, axis=0)) pool.set(llspace + 'spectral_contrast.var', var(contrast, axis=0)) pool.remove(llspace + 'sccoeffs') pool.remove(llspace + 'scvalleys')
def get_onsets(self, _audio=[]): if _audio != []: audio = _audio else: audio = self.audio W = es.Windowing(type=self.winType) c2p = es.CartesianToPolar() fft = es.FFT() onsetDetection = es.OnsetDetection(method=self.onsetMethod, sampleRate=44100) onsets = es.Onsets(alpha=.2) # onsetIndex = [] pool = Pool() for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(W(frame))) onsetDetection.configure(method=self.onsetMethod) onsetFunction = onsetDetection(mag, phase) pool.add("onsetFunction", onsetFunction) DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1]) return DetectedOnsetsArray
def compute_harmonic_magnitudes(contour_f0s, fftgram, idx_start, options): ''' Compute for each frame harm amplitude get harmonic partials form original spectrum Params: -------------------- fftgram - fftgram of whole audio file times - ts of whole audio hfreq - harmonics of contour magns - magns of contour ''' run_harm_model_anal = HarmonicModelAnal(nHarmonics=30) # TODO: sanity check: times == len(fftgram) and contour_start_time_SAL in times pool = Pool() for i, contour_f0 in enumerate(contour_f0s): if idx_start + i > len(fftgram) - 1: sys.exit('idx start is {} while len ffmtgram is {}'.format( idx_start, len(fftgram))) fft = fftgram[idx_start + i] # convert to freq : hfreq, magn, phase = run_harm_model_anal(fft, contour_f0) pool.add('phases', phase) pool.add('hfreqs', hfreq) pool.add('magns', magn) return pool['hfreqs'], pool['magns'], pool['phases']
def _extract_pitch_contours(self, audio): # Hann window with x4 zero padding run_windowing = estd.Windowing( # pylint: disable-msg=E1101 zeroPadding=3 * self.frame_size) run_spectrum = estd.Spectrum( # pylint: disable-msg=E1101 size=self.frame_size * 4) run_spectral_peaks = estd.SpectralPeaks( # pylint: disable-msg=E1101 minFrequency=self.min_frequency, maxFrequency=self.max_frequency, magnitudeThreshold=self.magnitude_threshold, sampleRate=self.sample_rate, orderBy='magnitude') # convert unit to cents, PitchSalienceFunction takes 55 Hz as the # default reference run_pitch_salience_function = \ estd.PitchSalienceFunction( # pylint: disable-msg=E1101 binResolution=self.bin_resolution) run_pitch_salience_function_peaks = \ estd.PitchSalienceFunctionPeaks( # pylint: disable-msg=E1101 binResolution=self.bin_resolution, minFrequency=self.min_frequency, maxFrequency=self.max_frequency) run_pitch_contours = estd.PitchContours( # pylint: disable-msg=E1101 hopSize=self.hop_size, binResolution=self.bin_resolution, peakDistributionThreshold=self.peak_distribution_threshold) # compute frame by frame pool = Pool() for frame in estd.FrameGenerator( audio, # pylint: disable-msg=E1101 frameSize=self.frame_size, hopSize=self.hop_size): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contour_saliences = \ run_pitch_salience_function_peaks(salience) if not np.size(salience_peaks_bins): salience_peaks_bins = np.array([0]) if not np.size(salience_peaks_contour_saliences): salience_peaks_contour_saliences = np.array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contour_saliences) # post-processing: contour tracking contours_bins, contour_saliences, contours_start_times, duration = \ run_pitch_contours( [f.tolist() for f in pool['allframes_salience_peaks_bins']], [f.tolist() for f in pool['allframes_salience_peaks_contourSaliences']]) return contours_bins, contours_start_times, contour_saliences, duration
def compute_energy(audio): energy = Energy() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('energy', energy(frame)) return 'True'
def compute_pitch_yin(audio): yin = PitchYin() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('pitch_yin', yin(frame)[0]) return p['pitch_yin']
def compute_zcr(audio): zcr = ZeroCrossingRate() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('zcr', zcr(frame)) return p['zcr']
def compute_rms(audio): rms = RMS() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('rms', rms(frame)) return p['rms']
def compute_power_spectrum(audio): w = Windowing(type='hann') power_spectrum = PowerSpectrum() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('power_spectrum', power_spectrum(w(frame))) return p['power_spectrum']
def compute_spectral_rolloff(audio): w = Windowing(type='hann') spectrum = Spectrum() rolloff = RollOff() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('spectral_rolloff', rolloff(spectrum(w(frame)))) return p['spectral_rolloff']
def compute_spectral_centroid(audio): w = Windowing(type='hann') spectrum = Spectrum() centroid = Centroid() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('spectral_centroid', centroid(spectrum(w(frame)))) return p['spectral_centroid']
def compute_mel(audio): w = Windowing(type='hann') spectrum = Spectrum() mel = MelBands(numberBands=96) p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('mel', mel(spectrum(w(frame)))) return p['mel']
def detect_essentia(arquivo_audio,selected): #ODF using essentia library # try: filename = arquivo_audio except: print "usage:", sys.argv[0], "<audiofile>" sys.exit() # don't forget, we can actually instantiate and call an algorithm on the same line! global audio # Phase 1: compute the onset detection function # The OnsetDetection algorithm tells us that there are several methods available in Essentia, # let's do two of them if selected==3: od = OnsetDetection(method = 'hfc') elif selected==4: od = OnsetDetection(method = 'complex') elif selected==5: od = OnsetDetection(method = 'melflux') elif selected==6: od = OnsetDetection(method = 'complex_phase') elif selected==7: od = OnsetDetection(method = 'rms') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase, = c2p(fft(w(frame))) pool.add('features.method', od(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print 'Computing onset times...' onsets_method = onsets(array([ pool['features.method'] ]), [ 1 ]) # and mark them on the audio, which we'll write back to disk # we use beeps instead of white noise to mark them, as it's more distinctive #convertendo para o tipo list listadet = onsets_method.tolist() #convertendo os segundos para frames listadet = [int(SecToFrames(x)) for x in listadet if x >= 0] return listadet
def compute_pitch_yinfft(audio): w = Windowing(type='hann') spectrum = Spectrum() yinfft = PitchYinFFT() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('pitch_yinfft', yinfft(spectrum(w(frame)))[0]) return p['pitch_yinfft']
def compute_bark(audio): w = Windowing(type='hann') spectrum = Spectrum() bark = BarkBands() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('bark', bark(spectrum(w(frame)))) return p['bark']
def compute_spectral_flatness(audio): w = Windowing(type='hann') spectrum = Spectrum() flatness = Flatness() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): p.add('spectral_flatness', flatness(spectrum(w(frame)))) return p['spectral_flatness']
def compute_mfcc(audio): w = Windowing(type='hann') spectrum = Spectrum() mfcc = MFCC() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): _, coeffs = mfcc(spectrum(w(frame))) p.add('mfcc', coeffs) return p['mfcc']
def _analyse(self, filepath): audio = to_mono(wavread(filepath)[0]) audio = audio.astype('float32') w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) hfc_detect = OnsetDetection(method = 'hfc') complex_detect = OnsetDetection(method = 'complex') rms_detect = RMS() spec = Spectrum() #pd = PitchDetection() flux = Flux() pool = Pool() #wap = WarpedAutoCorrelation() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize = self.frame_size,\ hopSize = self.hop_size): mag, phase, = c2p(fft(w(frame))) spectrum = spec(w(frame)) f = flux(spectrum) #pitch = pd(spectrum) pool.add('hfc', hfc_detect(mag, phase)) pool.add('complex', complex_detect(mag, phase)) pool.add('rms', rms_detect(frame)) pool.add('flux', f) #pool.add('pitch', pitch[0]) #print pool['pitch'] #pool.add('autoc', wap(pool['pitch'])) return pool, audio
def compute_spectral_shape(audio): w = Windowing(type='hann') spectrum = Spectrum() cm = CentralMoments() ds = DistributionShape() p = Pool() for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): spread, skewness, kurtosis = ds(cm(spectrum(w(frame)))) p.add('spectral_spread', spread) p.add('spectral_skewness', skewness) p.add('spectral_kurtosis', kurtosis) return p['spectral_spread'], p['spectral_skewness'], p['spectral_kurtosis']
def __mfccs__(audio): w = Windowing(type='hann') spectrum = Spectrum( ) # FFT() would return the complex FFT, here we just want the magnitude spectrum mfcc = MFCC() mfcc_pool = Pool() ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True): spec = spectrum(w(frame)) mfcc_bands, mfcc_coeffs = mfcc(spec) mfcc_pool.add('mfcc', mfcc_coeffs[1:]) mfcc_pool.add('mfcc_bands', mfcc_bands) return mfcc_pool
def compute_hpcp(audio): w = Windowing(type='hann') spectrum = Spectrum() peaks = SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) hpcp = HPCP() p = Pool() for frame in FrameGenerator(audio, frameSize=tonal_frame_size, hopSize=tonal_hop_size, startFromZero=True): p.add('hpcp', hpcp(*peaks(spectrum(w(frame))))) return p['hpcp']
def _extract_pitch_contours(self, audio): # Hann window with x4 zero padding run_windowing = estd.Windowing(zeroPadding=3 * self.frame_size) run_spectrum = estd.Spectrum(size=self.frame_size * 4) run_spectral_peaks = estd.SpectralPeaks( minFrequency=self.min_frequency, maxFrequency=self.max_frequency, magnitudeThreshold=self.magnitude_threshold, sampleRate=self.sample_rate, orderBy='magnitude') # convert unit to cents, PitchSalienceFunction takes 55 Hz as the # default reference run_pitch_salience_function = estd.PitchSalienceFunction( binResolution=self.bin_resolution) run_pitch_salience_function_peaks = estd.PitchSalienceFunctionPeaks( binResolution=self.bin_resolution, minFrequency=self.min_frequency, maxFrequency=self.max_frequency) run_pitch_contours = estd.PitchContours( hopSize=self.hop_size, binResolution=self.bin_resolution, peakDistributionThreshold=self.peak_distribution_threshold) # compute frame by frame pool = Pool() for frame in estd.FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contour_saliences = \ run_pitch_salience_function_peaks(salience) if not np.size(salience_peaks_bins): salience_peaks_bins = np.array([0]) if not np.size(salience_peaks_contour_saliences): salience_peaks_contour_saliences = np.array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contour_saliences) # post-processing: contour tracking contours_bins, contour_saliences, contours_start_times, duration = \ run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_contourSaliences']) return contours_bins, contours_start_times, contour_saliences, duration
def computeOnsets(inFile, outFile): # don't forget, we can actually instantiate and call an algorithm on the same line! print 'Loading audio file...' audio = MonoLoader(filename=inFile)() pool = Pool() onsetDetectionGlobal = OnsetDetectionGlobal() onsetDetections = onsetDetectionGlobal(audio) pool.add('features.onsetDetections', onsetDetections) onsets = Onsets() onsetTimes = onsets(array([onsetDetections]), [1]) pool.add('features.onsets', onsetTimes) np.savetxt(outFile, pool['features.onsets'][0], fmt='%f')
def extractMFCCs(audio): ''' extract mfccs from spectromra ''' ######## compute MFCCs # maybe set highFrequencyBound=22100 frameSizeInSamples = int(round(44100 * frameSize_block)) hopSizeInSamples = int(round(44100 * hopSize_block)) inputSpectrumSize = frameSizeInSamples / 2 + 1 # inputSpectrumSize = 1025 mfcc = MFCC(numberCoefficients=num_mfccs, numberBands=numberBands, highFrequencyBound=highFrequencyBound, inputSize=inputSpectrumSize) w = Windowing(type='hann') spectrum = Spectrum() mfccs_array = [] pool = Pool() audio = essentia.array(audio) for frame in FrameGenerator(audio, frameSize=frameSizeInSamples, hopSize=hopSizeInSamples): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) pool.add('mfcc', mfcc_coeffs) # mfccs_array = np.zeros( (len(spectogram), num_mfccs) ) # for i,spectrum in enumerate(spectogram): # # mfcc_bands, mfcc_coeffs = mfcc( spectrum ) # mfccs_array[i] = mfcc_coeffs # transpose to have it in a better shape # we need to convert the list to an essentia.array first (== numpy.array of floats) # mfccs_T = essentia.array(pool['mfcc']).T # # and plot # imshow(mfccs_T, aspect = 'auto', interpolation='none') # show() # unnecessary if you started "ipython --pylab" return pool['mfcc']
def SliceDrums_BeatDetection(folder, audio_filename, fs): od_hfc = OnsetDetection(method='hfc') w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) onsets = Onsets() x = MonoLoader(filename=folder + audio_filename, sampleRate=fs)() duration = float(len(x)) / fs x = x / np.max(np.abs(x)) t = np.arange(len(x)) / float(fs) zero_array = t * 0 #used only for plotting purposes #Plotting f, axarr = plt.subplots(1, 1, figsize=(80, 20)) #Essentia beat tracking pool = Pool() for frame in FrameGenerator(x, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od_hfc(mag, phase)) onsets_list = onsets(array([pool['features.hfc']]), [1]) axarr.vlines(onsets_list, -1, 1, color='k', zorder=2, linewidth=5.0) axarr.plot(t, x, zorder=1) axarr.axis('off') for i, onset in enumerate(onsets_list): sample = int(onset * fs) - 1000 samplename = "{}slices/{}{}__blind.wav".format(folder, str(len(str(i))), str(i)) if (i >= len(onsets_list) - 1): next_sample = len(x) else: next_sample = int(onsets_list[i + 1] * fs) - 1000 x_seg = x[sample:next_sample] MonoWriter(filename=samplename)(x_seg) return onsets_list, duration
def get_cat_audio_pitch(): spectrum = Spectrum() pitch = PitchYinFFT(frameSize=1024) pool = Pool() windowing = Windowing(type = 'hann') cat_audio = MonoLoader(filename='cat-01.wav', sampleRate=44100)() cat_audio_loudness = Loudness()(cat_audio) for frame in FrameGenerator(cat_audio, frameSize=1024, hopSize=512): spec = spectrum(windowing(frame)) p, conf = pitch(spec) pool.add('cat_pitch', p) cat_pitch = numpy.mean(pool['cat_pitch']) cat_MIDI = mir_eval.multipitch.frequencies_to_midi([cat_pitch]) return cat_audio, cat_MIDI[0]
def harmonic_magnitudes_to_audio(hfreqs, magns, phases, options): ''' Compute for each frame harm amplitude convert cent bins to herz get harmonic partials form original spectrum Params: hfreq - harmonics of contour magns - magns of contour return: spectogram contour out_audio_contour - audio of harmonics for a contour ''' pool = Pool() run_sine_model_synth = SineModelSynth(hopSize=512, sampleRate=options.Fs) run_ifft = IFFT(size=options.windowsizeInSamples) run_overl = OverlapAdd(frameSize=options.windowsizeInSamples, hopSize=512, gain=1. / options.windowsizeInSamples) out_audio_contour = np.array(0) for hfreq, hmag, hphase in zip(hfreqs, magns, phases): spectrum, audio_frame = harmonics_to_audio(hfreq, hmag, hphase, run_sine_model_synth, run_ifft, run_overl) out_audio_contour = np.append(out_audio_contour, audio_frame) pool.add('spectrum', spectrum) out_audio_contour = SM.sineModelSynth(hfreqs, magns, phases, 512, 128, 44100) return out_audio_contour, pool['spectrum']
def spectrogram(audio, audio_file, save_fig=True, save_fig_path=None): if audio_file.endswith('.wav'): w = Windowing(type='hann') spectrum = Spectrum( ) # FFT() would return the complex FFT, here we just want the magnitude spectrum pool = Pool() ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True): win = w(frame) spec = spectrum(win) pool.add('spec', spec) aggrPool = PoolAggregator(defaultStats=['mean'])(pool) a = sum(aggrPool['spec.mean'].T) / aggrPool['spec.mean'].T.shape[0] # a = aggrPool['spec.mean'].T # b = np.zeros(pool['spec'].T.shape) b = np.array(pool['spec'].T) # for iterator1, i in enumerate(pool['spec'].T): # for iterator2, j in enumerate(i): # # if j > a[iterator1]/2: # if j > a/2 and j > 0.015: # b[iterator1][iterator2] = j # b = np.array([i for i in b if i.max() > 0.01]) # no para el nuevo dataset de bats b = remove_initial_zeros(b) b = b.tolist() b.reverse() b = remove_initial_zeros(b) b.reverse() b = np.array(b) if save_fig: if not save_fig_path: save_fig_path = audio_file.replace('.wav', '_spec.jpg') save_plots(b, save_fig_path) return b[:200, :200].tolist()
def deteccoes(arquivo_audio): #Return a list with all detections try: filename = sys.argv[1] except: print "usage:", sys.argv[0], "<audiofile>" sys.exit() audio = MonoLoader(filename = filename)() # Phase 1: compute the onset detection function # The OnsetDetection algorithm tells us that there are several methods available in Essentia, # let's do two of them od1 = OnsetDetection(method = 'hfc') od2 = OnsetDetection(method = 'complex') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od1(mag, phase)) pool.add('features.complex', od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print 'Computing onset times...' onsets_hfc = onsets(# this algo expects a matrix, not a vector array([ pool['features.hfc'] ]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [ 1 ]) onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ]) # and mark them on the audio, which we'll write back to disk # we use beeps instead of white noise to mark them, as it's more distinctive print 'Writing audio files to disk with onsets marked...' # mark the 'hfc' onsets: #convertendo para o tipo list listadethfc = onsets_hfc.tolist() listadetcomplex = onsets_complex.tolist() #convertendo os segundos para frames listadethfc = [int(SecToFrames(x)) for x in listadethfc if x >= 0] listadetcomplex = [int(SecToFrames(x)) for x in listadetcomplex if x >= 0] return listadetcomplex
def hpcpgram(audio, sampleRate=44100, frameSize=4096, hopSize=2048, numBins=12, windowType='blackmanharris62', minFrequency=100, maxFrequency=4000, whitening=False, maxPeaks=100, magnitudeThreshold=1e-05, **kwargs): """ Compute Harmonic Pitch Class Profile (HPCP) Grams for overlapped frames of a given input audio signal For additional list of parameters of essentia standard mode HPCP please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html References: [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing. Inputs audio (2d vector): audio signal Parameters: sampleRate : (real ∈ (0, ∞), default = 44100) : the sampling rate of the audio signal [Hz] frameSize (integer ∈ [1, ∞), default = 1024) : the output frame size hopSize (integer ∈ [1, ∞), default = 512) : the hop size between frames numBins : (integer ∈ [12, ∞), default = 12) : the size of the output HPCP (must be a positive nonzero multiple of 12) windowType (string ∈ {hamming, hann, hannnsgcq, triangular, square, blackmanharris62, blackmanharris70, blackmanharris74, blackmanharris92}, default = blackmanharris62) : the window type, which can be 'hamming', 'hann', 'triangular', 'square' or 'blackmanharrisXX' maxFrequency : (real ∈ (0, ∞), default = 4000) : the maximum frequency that contributes to the SpectralPeaks and HPCP algorithms computation [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz) minFrequency : (real ∈ (0, ∞), default = 100) : the minimum frequency that contributes to the SpectralPeaks and HPCP algorithm computation [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz) maxPeaks (integer ∈ [1, ∞), default = 100) : the maximum number of returned peaks while calculating SpectralPeaks magnitudeThreshold (real ∈ (-∞, ∞), default = 0) : peaks below this given threshold are not outputted while calculating Spectral Peaks whitening : (boolean (True, False), default = False) Optional step of computing spectral whitening to the output from speakPeak magnitudes kwargs : additional keyword arguments Arguments to parameterize HPCP alogithms. see standard mode HPCP algorithm (http://essentia.upf.edu/documentation/reference/std_HPCP.html). Returns: hpcpgram of overlapped frames of input audio signal (2D vector) """ frameGenerator = es.FrameGenerator(array(audio), frameSize=frameSize, hopSize=hopSize) window = es.Windowing(type=windowType) spectrum = es.Spectrum() # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html spectralPeaks = es.SpectralPeaks(magnitudeThreshold=magnitudeThreshold, maxFrequency=maxFrequency, minFrequency=minFrequency, maxPeaks=maxPeaks, sampleRate=sampleRate) # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html spectralWhitening = es.SpectralWhitening(maxFrequency=maxFrequency, sampleRate=sampleRate) # http://essentia.upf.edu/documentation/reference/std_HPCP.html hpcp = es.HPCP(sampleRate=sampleRate, maxFrequency=maxFrequency, minFrequency=minFrequency, size=numBins, **kwargs) pool = Pool() #compute hpcp for each frame and add the results to the pool for frame in frameGenerator: spectrum_mag = spectrum(window(frame)) frequencies, magnitudes = spectralPeaks(spectrum_mag) if whitening: w_magnitudes = spectralWhitening(spectrum_mag, frequencies, magnitudes) hpcp_vector = hpcp(frequencies, w_magnitudes) else: hpcp_vector = hpcp(frequencies, magnitudes) pool.add('tonal.hpcp', hpcp_vector) return pool['tonal.hpcp']
od1 = OnsetDetection(method='hfc') od2 = OnsetDetection(method='complex') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od1(mag, phase)) pool.add('features.complex', od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print 'Computing onset times...' onsets_hfc = onsets( # this algo expects a matrix, not a vector array([pool['features.hfc']]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [1]) #onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ]) print 'Onsets-hfc'
def extractFeatures(arffDir = '.', dirname = '.', fnames = '', segment_length = 'WHOLE', hopsize = 0): # Start to process file by file from input for fname in fnames: # It only process wav or mp3 file if ".wav" not in fname.lower() and ".mp3" not in fname.lower(): continue # Generate output dir trackName = fname.split('/')[-1] segmentArffDir = arffDir+"/"+trackName[:-4]+"/" if not exists(segmentArffDir): mkdir(segmentArffDir) else: print fname + ' exsits, pass...' continue # Read audio and some more info loader = es.EasyLoader(filename = dirname+"/"+fname) audio = loader.compute() sampleRate = loader.paramValue('sampleRate') length = int(len(audio)/sampleRate) if length == 0: length = 1 print fname + ' length: ' + str(length) if hopsize == 0: hopsize = segment_length # Specify the length of the segment if segment_length == 'WHOLE': step = length end_time = length segment_length = length print 'The whole audio is being processed...' else: step = hopsize segment_length = float(segment_length) if step>length: continue # Start computing segment by segment for start_time in arange(0, length, step): end_time = start_time + segment_length if step != length: print 'the time from second ' + str(start_time) + ' is being processed...' if end_time > length: break; segAudio = audio[start_time*sampleRate:end_time*sampleRate] pool = Pool() # Setup parameters specContrast = es.SpectralContrast(frameSize=2048, lowFrequencyBound=40, sampleRate=sampleRate) spectrum = es.Spectrum(size=2048) #size is frameSize mfcc = es.MFCC(lowFrequencyBound=40, sampleRate=sampleRate) # MFCC if step > 20: hpcp = es.HPCP(size = 12, referenceFrequency = 440, harmonics=8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, \ splitFrequency = 500.0, weightType = 'cosine', nonLinear = False, windowSize = 1);# HPCP lowLevelSpectralExtractor = \ es.LowLevelSpectralExtractor(frameSize=2048, hopSize=1024, sampleRate=sampleRate) spectralPeaks = es.SpectralPeaks(sampleRate=sampleRate, minFrequency=40, maxFrequency=11000, maxPeaks=50, magnitudeThreshold=0.2) # Low level spectral feature analysis try: features = lowLevelSpectralExtractor(segAudio) except: print start_time, "has failed!" continue # Harmonic spectral features (TODO: Is the magnitude threshold ok?) harmonicPeaks = es.HarmonicPeaks() pitch = es.PitchDetection() # Using YIN instead of predominant pitch analysis as this frame-based analysis # Windowing window = es.Windowing(size=2048) for frame in es.FrameGenerator(segAudio, frameSize=2048, hopSize=1024): # spectral contrast s = spectrum(window(frame)) contrast, valley = specContrast(s) pool.add('spectral_contrast', contrast) pool.add('spectral_valley', valley) # MFCC bands, mfccs = mfcc(s) pool.add('mfcc', mfccs[1:]) freqs, mags = spectralPeaks(s) # HPCP if step > 20: hpcps = hpcp(freqs, mags) pool.add('HPCP', hpcps) # Self-compute spectral features if len(freqs) > 0: p, conf = pitch(s) if freqs[0] == 0: freqs = freqs[1:] mags = mags[1:] freqs, mags = harmonicPeaks(freqs, mags, p) _sum = 0 if len(freqs) == 1: specEnvelope_i = [freqs[0]] #for hsd _sum = freqs[0]*mags[0] elif len(freqs) == 2: specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd _sum = freqs[0]*mags[0]+freqs[1]*mags[1] elif len(freqs) > 2: specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd _sum = freqs[0]*mags[0] for i in xrange(1, len(freqs)-1): _sum += freqs[i]*mags[i] #for hsc_i specEnvelope_i.append((freqs[i-1]+freqs[i]+freqs[i+1])/3.0) specEnvelope_i.append((freqs[i]+freqs[i+1])/2.0) _sum += freqs[i+1]*mags[i+1] hsc_i = _sum/sum(mags) pool.add('harmonic_spectral_centroid', hsc_i) hsd_i = sum(abs(log10(mags)-log10(specEnvelope_i)))/sum(log10(mags)) pool.add('harmonic_spectral_deviation', hsd_i) hss_i = sqrt(sum(square(freqs-hsc_i)*square(mags))/sum(square(mags)))/hsc_i pool.add('harmonic_spectral_spread', hss_i) else: pool.add('harmonic_spectral_centroid', 0) pool.add('harmonic_spectral_deviation', 0) pool.add('harmonic_spectral_spread', 0) for i in xrange(0, len(features[0])): # pool.add('barkbands', features[0][i]) pool.add('hfc', features[4][i]) pool.add('pitch', features[6][i]) pool.add('pitch_instantaneous_confidence', features[7][i]) pool.add('pitch_salience', features[8][i]) pool.add('silence_rate_20dB', features[9][i]) # pool.add('silence_rate_30dB', features[10][i]) # pool.add('silence_rate_60dB', features[11][i]) pool.add('spectral_complexity', features[12][i]) pool.add('spectral_crest', features[13][i]) pool.add('spectral_decrease', features[14][i]) pool.add('spectral_energy', features[15][i]) # pool.add('spectral_energyband_low', features[16][i]) # pool.add('spectral_energyband_middle_low', features[17][i]) # pool.add('spectral_energyband_middle_high', features[18][i]) # pool.add('spectral_energy_high', features[19][i]) pool.add('spectral_flatness_db', features[20][i]) pool.add('spectral_flux', features[21][i]) pool.add('spectral_rms', features[22][i]) pool.add('spectral_rolloff', features[23][i]) pool.add('spectral_strongpeak', features[24][i]) pool.add('zero_crossing_rate', features[25][i]) pool.add('inharmonicity', features[26][i]) pool.add('tristimulus', features[27][i]) onsetRate = es.OnsetRate() onsets, rate = onsetRate(segAudio) try: aggrPool = es.PoolAggregator(defaultStats = ['mean', 'var', 'skew', 'kurt'])(pool) except: print start_time/step, "failed" continue aggrPool.add('onset_rate', rate) #print start_time, segment_length, start_time/segment_length fileout = segmentArffDir+trackName[:-4]+"_%003d%s"%(start_time/step, ".sig") output = es.YamlOutput(filename = fileout) output(aggrPool)
od1 = OnsetDetection(method="hfc") od2 = OnsetDetection(method="complex") # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type="hann") fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print "Computing onset detection functions..." for frame in FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add("features.hfc", od1(mag, phase)) pool.add("features.complex", od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print "Computing onset times..." onsets_hfc = onsets( # this algo expects a matrix, not a vector array([pool["features.hfc"]]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [1], ) onsets_complex = onsets(array([pool["features.complex"]]), [1])
od1 = OnsetDetection(method = 'hfc') od2 = OnsetDetection(method = 'complex') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type = 'hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od1(mag, phase)) pool.add('features.complex', od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print 'Computing onset times...' onsets_hfc = onsets(# this algo expects a matrix, not a vector array([ pool['features.hfc'] ]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [ 1 ]) onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])
def chroma_hpcp(self, frameSize=4096, hopSize=2048, windowType='blackmanharris62', harmonicsPerPeak=8, magnitudeThreshold=1e-05, maxPeaks=1000, whitening=True, referenceFrequency=440, minFrequency=40, maxFrequency=5000, nonLinear=False, numBins=12, display=False): ''' Compute Harmonic Pitch Class Profiles (HPCP) for the input audio files using essentia standard mode using the default parameters as mentioned in [1]. Please refer to the following paper for detailed explanantion of the algorithm. [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing. For full list of parameters of essentia standard mode HPCP please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html Parameters harmonicsPerPeak : (integer ∈ [0, ∞), default = 0) : number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution maxFrequency : (real ∈ (0, ∞), default = 5000) : the maximum frequency that contributes to the HPCP [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz) minFrequency : (real ∈ (0, ∞), default = 40) : the minimum frequency that contributes to the HPCP [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz) nonLinear : (bool ∈ {true, false}, default = false) : apply non-linear post-processing to the output (use with normalized='unitMax'). Boosts values close to 1, decreases values close to 0. normalized (string ∈ {none, unitSum, unitMax}, default = unitMax) : whether to normalize the HPCP vector referenceFrequency : (real ∈ (0, ∞), default = 440) : the reference frequency for semitone index calculation, corresponding to A3 [Hz] sampleRate : (real ∈ (0, ∞), default = 44100) : the sampling rate of the audio signal [Hz] numBins : (integer ∈ [12, ∞), default = 12) : the size of the output HPCP (must be a positive nonzero multiple of 12) whitening : (boolean (True, False), default = False) Optional step of computing spectral whitening to the output from speakPeak magnitudes ''' audio = array(self.audio_vector) #print audio.shape frameGenerator = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize) window = estd.Windowing(type=windowType) spectrum = estd.Spectrum() # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html spectralPeaks = estd.SpectralPeaks(magnitudeThreshold=0, maxFrequency=maxFrequency, minFrequency=minFrequency, maxPeaks=maxPeaks, orderBy="frequency", sampleRate=self.fs) # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html spectralWhitening = estd.SpectralWhitening(maxFrequency=maxFrequency, sampleRate=self.fs) # http://essentia.upf.edu/documentation/reference/std_HPCP.html hpcp = estd.HPCP(sampleRate=self.fs, maxFrequency=maxFrequency, minFrequency=minFrequency, referenceFrequency=referenceFrequency, nonLinear=nonLinear, harmonics=harmonicsPerPeak, size=numBins) pool = Pool() #compute hpcp for each frame and add the results to the pool for frame in frameGenerator: spectrum_mag = spectrum(window(frame)) frequencies, magnitudes = spectralPeaks(spectrum_mag) if whitening: w_magnitudes = spectralWhitening(spectrum_mag, frequencies, magnitudes) hpcp_vector = hpcp(frequencies, w_magnitudes) else: hpcp_vector = hpcp(frequencies, magnitudes) pool.add('tonal.hpcp', hpcp_vector) if display: display_chroma(np.swapaxes(pool['tonal.hpcp']), 0, 1) return pool['tonal.hpcp']
def sliceDrums_from_annotations_SDtrainset(instrument_name, segments_dir, song_dict, fs): """ Input: instrument_name: str woth a key in the song_dict segments_dir : str with path where slices are saved song_dict : dict containing audio stream and annotations fs : sampling rate to properly save the files This function slices audio stream based on annotations and save each slice in a individual wav file, each on the corresponent folder = segmens_dir/song_name/instrument/file.wav Adapted to routines recorded in the studio This function could be combined with the feature extraction in the next cells, but having the slices saved allows us to do data augmentation combining individual samples to get more instances of all the combinations """ song = song_dict[instrument_name] x_seg_dir = os.path.join(segments_dir, instrument_name) od_complex = OnsetDetection(method='complex') w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) onsets = Onsets() file_count = 0 for audio in song['audios']: x = audio duration = float(len(x)) / fs x = x / np.max(np.abs(x)) t = np.arange(len(x)) / float(fs) #Essentia beat tracking pool = Pool() for frame in FrameGenerator(x, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.complex', od_complex(mag, phase)) onsets_list = onsets(array([pool['features.complex']]), [1]) first_onset = int(onsets_list[0] * fs) print(first_onset) if not os.path.exists(segments_dir): #creating the directory os.mkdir(segments_dir) segments_dir__ = os.path.join(segments_dir, instrument_name) if not os.path.exists(segments_dir__): #creating the directory os.mkdir(segments_dir__) n_notes = len(song['annotations']) annotations = song['annotations'] for i in range(1, n_notes): if i != n_notes - 1 and i != 0: x_seg = audio[(annotations[i][2] - 3000 + first_onset):(annotations[i + 1][2] - 3000 + first_onset)] if len(x_seg) < 5000 or np.max(np.abs(x_seg)) < 0.05: continue x_seg = x_seg / np.max(np.abs(x_seg)) if not os.path.exists(x_seg_dir): #creating the directory os.mkdir(x_seg_dir) path, dirs, files = next(os.walk(x_seg_dir)) dir_n_files = len(files) if annotations[i][1] == 'N': continue filename = os.path.join( x_seg_dir, instrument_name + '_' + str(dir_n_files) + '.wav') ess.MonoWriter(filename=filename, format='wav', sampleRate=fs)(x_seg) file_count = file_count + 1 print(instrument_name + ": " + str(file_count))
def run(self, fname): citation = u""" Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X. (2014). Audio feature extraction for exploring Turkish makam music. In Proceedings of 3rd International Conference on Audio Technologies for Music and Media, Ankara, Turkey. """ run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding run_spectrum = Spectrum(size=self.settings.frameSize * 4) run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency, maxFrequency = self.settings.maxFrequency, maxPeaks = self.settings.maxPeaks, sampleRate = self.settings.sampleRate, magnitudeThreshold = self.settings.magnitudeThreshold, orderBy = 'magnitude') run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution) run_pitch_contours = PitchContours(hopSize=self.settings.hopSize, binResolution=self.settings.binResolution, peakDistributionThreshold = self.settings.peakDistributionThreshold) pool = Pool(); # load audio and eqLoudness audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default audio = EqualLoudness()(audio) for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience) if not size(salience_peaks_bins): salience_peaks_bins = array([0]) if not size(salience_peaks_contourSaliences): salience_peaks_contourSaliences = array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences) # post-processing: contour tracking contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_contourSaliences']) # run the simplified contour selection #[pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration) # cent to Hz conversion #pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch] # generate time stamps #time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))] # [time pitch salience] matrix #out = transpose(vstack((time_stamps, pitch, pitch_salience))) #out = out.tolist() # settings settings = self.settings settings.update({'version':self.__version__, 'slug':self.__slug__, 'source': fname, 'essentiaVersion': essentia.__version__, 'pitchUnit': 'Hz', 'citation': citation}) # matlab #matout = cStringIO.StringIO() #matob = {'pitch': out} #matob.update(settings) #scipy.io.savemat(matout, matob) #return out # unused #return {'pitch': json.dumps(out), # 'matlab': matout.getvalue(), # 'settings': json.dumps(settings)} return contours_bins, contours_contourSaliences, contours_start_times, duration
def run(self, fname): citation = u'Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X. ' \ '(2014). Audio feature extraction for exploring Turkish makam music. ' \ 'In Proceedings of 3rd International Conference on Audio Technologies ' \ 'for Music and Media, Ankara, Turkey.' run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding run_spectrum = Spectrum(size=self.settings.frameSize * 4) run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency, maxFrequency = self.settings.maxFrequency, maxPeaks = self.settings.maxPeaks, sampleRate = self.settings.sampleRate, magnitudeThreshold = self.settings.magnitudeThreshold, orderBy = 'magnitude') run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution) run_pitch_contours = PitchContours(hopSize=self.settings.hopSize, binResolution=self.settings.binResolution, peakDistributionThreshold = self.settings.peakDistributionThreshold) pool = Pool(); # load audio and eqLoudness audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default audio = EqualLoudness()(audio) for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize): frame = run_windowing(frame) spectrum = run_spectrum(frame) peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum) salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience) if not size(salience_peaks_bins): salience_peaks_bins = array([0]) if not size(salience_peaks_contourSaliences): salience_peaks_contourSaliences = array([0]) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences) # post-processing: contour tracking contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_contourSaliences']) # WARNING: As of 3 April 2015, the values in "contours_start_times" leads the audio # by 1024 + 128 samples if the read audio is in mp3 format as explained in # https://github.com/MTG/essentia/issues/246. This roots because of the typical # encoder/decoder problems. For now We are advancing the values in "contours_start_times" # by 1152 samples. Uncomment the next line if this problem is fixed. contours_start_times = [c + (1024+128)/float(self.settings.sampleRate) for c in contours_start_times] # run the simplified contour selection [pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration) # cent to Hz conversion pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch] # generate time stamps time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))] # [time pitch salience] matrix out = transpose(vstack((time_stamps, pitch, pitch_salience))) out = out.tolist() # settings settings = self.settings settings.update({'version':self.__version__, 'slug':self.__slug__, 'source': fname, 'essentiaVersion': essentia.__version__, 'pitchUnit': 'Hz', 'citation': citation}) # matlab matout = cStringIO.StringIO() matob = {'pitch': out} matob.update(settings) scipy.io.savemat(matout, matob) return {'pitch': json.dumps(out), 'matlab': matout.getvalue(), 'settings': json.dumps(settings)}
loader = MonoLoader(filename=input_file, sampleRate=sampleRate) w = Windowing(type='blackmanharris62') spectrum = Spectrum() melbands = MelBands(sampleRate=sampleRate, numberBands=40, lowFrequencyBound=0, highFrequencyBound=sampleRate / 2) pool = Pool() for frame in FrameGenerator(audio=loader(), frameSize=frameSize, hopSize=hopSize, startFromZero=True): bands = melbands(spectrum(w(frame))) pool.add('melbands', bands) print len(pool['melbands']), "Mel band frames" print len(pool['melbands']) / 32, "Rhythm transform frames" rhythmtransform = RhythmTransform(frameSize=rmsFrameSize, hopSize=rmsHopSize) rt = rhythmtransform(pool['melbands']) import matplotlib.pyplot as plt plt.imshow(rt.T[:, :], aspect='auto') plt.xlabel('Frames') plt.ylabel('Rhythm Transform coefficients') plt.show()
def computeOnsets(inFile, outFile): print outFile # In this example we are going to look at how to perform some onset detection # and mark them on the audio using the AudioOnsetsMarker algorithm. # # Onset detection consists of two main phases: # 1- we need to compute an onset detection function, which is a function # describing the evolution of some parameters, which might be representative # of whether we might find an onset or not # 2- performing the actual onset detection, that is given a number of these # detection functions, decide where in the sound there actually are onsets # we're going to work with a file specified as an argument in the command line # try: # filename = sys.argv[1] # except: # print "usage:", sys.argv[0], "<audiofile>" # sys.exit() # don't forget, we can actually instantiate and call an algorithm on the same line! print 'Loading audio file...' audio = MonoLoader(filename=inFile)() # Phase 1: compute the onset detection function # The OnsetDetection algorithm tells us that there are several methods available in Essentia, # let's do two of them od1 = OnsetDetection(method='hfc') od2 = OnsetDetection(method='complex') # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print 'Computing onset detection functions...' for frame in FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.hfc', od1(mag, phase)) pool.add('features.complex', od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print 'Computing onset times...' onsets_hfc = onsets( # this algo expects a matrix, not a vector array([pool['features.hfc']]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [1]) np.savetxt(outFile, onsets_hfc, fmt='%f') #Let's just take the complex as an example onsets_complex = onsets(array([pool['features.complex']]), [1]) np.savetxt(outFile, onsets_complex, fmt='%f')
sampleRate = 22050 frameSize = 8192 hopSize = 1024 rmsFrameSize = 256 rmsHopSize = 32 loader = MonoLoader(filename=input_file, sampleRate=sampleRate) w = Windowing(type='blackmanharris62') spectrum = Spectrum() melbands = MelBands(sampleRate=sampleRate, numberBands=40, lowFrequencyBound=0, highFrequencyBound=sampleRate/2) pool = Pool() for frame in FrameGenerator(audio=loader(), frameSize=frameSize, hopSize=hopSize, startFromZero=True): bands = melbands(spectrum(w(frame))) pool.add('melbands', bands) rhythmtransform = RhythmTransform(frameSize=rmsFrameSize, hopSize=rmsHopSize) rt = rhythmtransform(pool['melbands']) rt_mean = numpy.mean(rt, axis=0) bin_resoluion = 5.007721656976744 print numpy.argmax(rt_mean) * bin_resoluion
def computeOnsets(inFile, outFile): print outFile # In this example we are going to look at how to perform some onset detection # and mark them on the audio using the AudioOnsetsMarker algorithm. # # Onset detection consists of two main phases: # 1- we need to compute an onset detection function, which is a function # describing the evolution of some parameters, which might be representative # of whether we might find an onset or not # 2- performing the actual onset detection, that is given a number of these # detection functions, decide where in the sound there actually are onsets # we're going to work with a file specified as an argument in the command line # try: # filename = sys.argv[1] # except: # print "usage:", sys.argv[0], "<audiofile>" # sys.exit() # don't forget, we can actually instantiate and call an algorithm on the same line! print "Loading audio file..." audio = MonoLoader(filename=inFile)() # Phase 1: compute the onset detection function # The OnsetDetection algorithm tells us that there are several methods available in Essentia, # let's do two of them od1 = OnsetDetection(method="hfc") od2 = OnsetDetection(method="complex") # let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type="hann") fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) pool = Pool() # let's get down to business print "Computing onset detection functions..." for frame in FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add("features.hfc", od1(mag, phase)) pool.add("features.complex", od2(mag, phase)) # Phase 2: compute the actual onsets locations onsets = Onsets() print "Computing onset times..." onsets_hfc = onsets( # this algo expects a matrix, not a vector array([pool["features.hfc"]]), # you need to specify weights, but as there is only a single # function, it doesn't actually matter which weight you give it [1], ) np.savetxt(outFile, onsets_hfc, fmt="%f") # Let's just take the complex as an example onsets_complex = onsets(array([pool["features.complex"]]), [1]) np.savetxt(outFile, onsets_complex, fmt="%f")