def get_bpm(file_in): pool = Pool() loader = streaming.MonoLoader(filename=file_in) bt = streaming.RhythmExtractor2013() bpm_histogram = streaming.BpmHistogramDescriptors() # BPM histogram output size is 250 centroid = streaming.Centroid(range=250) loader.audio >> bt.signal bt.bpm >> (pool, 'bpm') bt.ticks >> None bt.confidence >> (pool, 'confidence') bt.estimates >> None bt.bpmIntervals >> bpm_histogram.bpmIntervals bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak') bpm_histogram.firstPeakWeight >> None bpm_histogram.firstPeakSpread >> None bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak') bpm_histogram.secondPeakWeight >> None bpm_histogram.secondPeakSpread >> None bpm_histogram.histogram >> (pool, 'bpm_histogram') bpm_histogram.histogram >> centroid.array centroid.centroid >> (pool, 'bpm_centroid') run(loader) return pool['bpm']
def get_key(file_in): """ Estimates the key and scale for an audio file. """ loader = streaming.MonoLoader(filename=file_in) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) pool = Pool() hpcp = streaming.HPCP() key = streaming.Key() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> key.pcp key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') run(loader) return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
def estimate_main_band(infile): """ Estimate if this is a low, mid, or high track. Not _really_ sure if this does what I need it to, but some quick tests looked right. """ loader = streaming.MonoLoader(filename=infile) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000]) pool = Pool() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> freqbands.spectrum freqbands.bands >> (pool, 'bands') run(loader) sums = np.sum(pool['bands'], axis=0) band = np.argmax(sums) if band == 0: return 'low' elif band == 1: return 'mid' elif band == 2: return 'high'
def cutAudioFile(self, filename, frameSize, hopSize, startFromZero, expectedNumFrames): loader = es.MonoLoader(filename=join(testdata.audio_dir, 'generated','synthesised', 'shortfiles', filename)) fc = es.FrameCutter(frameSize=frameSize, hopSize = hopSize, startFromZero = startFromZero) p = Pool() loader.audio >> fc.signal fc.frame >> (p, 'audio.frames') run(loader) self.assertEqual(len(p['audio.frames']), expectedNumFrames)
def estimate_danceability(infile): loader = streaming.MonoLoader(filename=infile) dance = streaming.Danceability() pool = Pool() loader.audio >> dance.signal dance.danceability >> (pool, 'danceability') run(loader) return pool['danceability']
def load_audio_from_project(audios_path="../audio/preliminares/", audio_name=None, print_output=True): """ Carrega os áudios em audios_path. Se audio_name estiver definido, carrega-o """ output = {} if not audio_name: audio_files = [ f for f in listdir(audios_path) if not path.isdir(path.join(audios_path, f)) ] for name in audio_files: metadata = es_mode.MetadataReader(filename=audios_path + name)() loader = es_mode.MonoLoader(filename=(audios_path+name),\ sampleRate=(checkSampleRate(metadata[10]))) if print_output: print("Loaded", name, "with sampleRate:", checkSampleRate(metadata[10])) output[name] = { "data": loader(), "path": audios_path + name, "sample_rate": checkSampleRate(metadata[10]), "metadata": metadata } else: metadata = es_mode.MetadataReader(filename=audios_path + audio_name)() loader = es_mode.MonoLoader(filename=(audios_path+audio_name),\ sampleRate=checkSampleRate(metadata[10])) if print_output: print("Loaded", audio_name, "with sampleRate:", checkSampleRate(metadata[10])) output[audio_name] = { "data": loader(), "path": audios_path + audio_name, "sample_rate": checkSampleRate(metadata[10]), "metadata": metadata } return output
def testRegressionStreaming(self): # In Streaming mode the duration of the file is not known by the algorithm so it works by concatenating fragments. # About 30 seconds of processing are required to show results consistant with the Standard mode. loader = es.MonoLoader(filename=join(testdata.audio_dir, 'recorded', 'mozart_c_major_30sec.wav')) cp = es.Chromaprinter(analysisTime=30, concatenate=True) pool = Pool() loader.audio >> cp.signal cp.fingerprint >> (pool, 'chromaprint') es.essentia.run(loader) self.assertEqualVector(self.expected, pool['chromaprint'][0])
def estimate_chroma(self, uid): loader = esstr.MonoLoader( filename=self.audio_path_extractor.audio_path_name(uid)) framecutter = esstr.FrameCutter(hopSize=self.hop_size, frameSize=self.frame_size) windowing = esstr.Windowing(type="blackmanharris62") spectrum = esstr.Spectrum() spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) hpcp = esstr.HPCP(size=12, referenceFrequency=self.tuning_freq, harmonics=8, bandPreset=True, minFrequency=float(40), maxFrequency=float(5000), bandSplitFrequency=500.0, weightType="cosine", nonLinear=True, windowSize=1.0) """ hpcp = esstr.HPCP( size=12, referenceFrequency = tuningFreq, harmonics = 8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, bandSplitFrequency = 250.0, weightType = "cosine", nonLinear = False, windowSize = 1.0) """ pool = essentia.Pool() # connect algorithms together loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectrum.spectrum >> (pool, 'spectrum.magnitude') spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> (pool, 'chroma.hpcp') essentia.run(loader) # roll from 'A' based to 'C' based chroma = pool['chroma.hpcp'] chroma = np.roll(chroma, shift=-3, axis=1) return chroma
def testStreamingRegression(self): # Streaming mode should also be tested to ensure it works well # with the real accumulator. import essentia.streaming as estr loader = estr.MonoLoader(filename=join( testdata.audio_dir, 'recorded/mozart_c_major_30sec.wav')) realAccumulator = estr.RealAccumulator() startStopCut = estr.StartStopCut() pool = Pool() loader.audio >> realAccumulator.data realAccumulator.array >> startStopCut.audio startStopCut.startCut >> (pool, 'start') startStopCut.stopCut >> (pool, 'stop') essentia.run(loader) self.assertEqual(pool['start'], 0) self.assertEqual(pool['stop'], 1)
params = { 'frameSize': 2048, 'hopSize': 128, 'startFromZero': False, 'sampleRate': 44100,'maxnSines': 100,'magnitudeThreshold': -74,'minSineDur': 0.02,'freqDevOffset': 10, 'freqDevSlope': 0.001} # loop over all frames audioout = np.array(0) counter = 0 # input and output files import os.path tutorial_dir = os.path.dirname(os.path.realpath(__file__)) inputFilename = os.path.join(tutorial_dir, 'singing-female.wav') outputFilename = os.path.join(tutorial_dir, 'singing-female-out-sinesubtraction.wav') out = np.array(0) loader = es.MonoLoader(filename = inputFilename, sampleRate = params['sampleRate']) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); fft = es.FFT(size = params['frameSize']); smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope']) subtrFFTSize = min(params['frameSize']/4, 4* params['hopSize']) smsub = es.SineSubtraction(sampleRate = params['sampleRate'], fftSize = subtrFFTSize, hopSize = params['hopSize']) # analysis loader.audio >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes')
import essentia db_conn = sqlite3.connect("data.db") db_cursor = db_conn.cursor() names = [ 'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock' ] for name in names: for i in range(10): song_name = name + '.' + '0000' + str(i) + '.au' print(song_name) loader = ess.MonoLoader(filename="genres/" + name + "/" + song_name) framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters hpcp = ess.HPCP() # Use pool to store data
# STFT synthesis ifftframe = ifft(outfft) out = overl(ifftframe) if counter >= (framesize/(2*hopsize)): audioout = np.append(audioout, out) counter += 1 # write audio output print audioout.shape awrite(audioout.astype(np.float32)) if mode == 'streaming': out = np.array(0) loader = es.MonoLoader(filename = inputFilename, sampleRate = 44100) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = framesize, hopSize = hopsize, startFromZero = False); w = es.Windowing(type = "hann"); fft = es.FFT(size = framesize); ifft = es.IFFT(size = framesize); overl = es.OverlapAdd (frameSize = framesize, hopSize = hopsize); awrite = es.MonoWriter (filename = outputFilename, sampleRate = 44100); #gen = audio #VectorInput(audio) loader.audio >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> awrite.audio
import essentia.streaming as ess import essentia import numpy as np import librosa # Initialize algorithms we will use loader = ess.MonoLoader(filename='genres/blues/blues.00000.au') framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters for plots, however we will need higher resolution # and custom parameters for better Key estimation hpcp = ess.HPCP() hpcp_key = ess.HPCP(size=36, # we will need higher resolution for Key estimation referenceFrequency=440, # assume tuning frequency is 44100. bandPreset=False, minFrequency=20, maxFrequency=3500, weightType='cosine', nonLinear=False, windowSize=1.) key = ess.Key(profileType='edma', # Use profile for electronic music numHarmonics=4,
import essentia.standard as es import essentia.streaming as ess import matplotlib.pyplot as plt loader = ess.MonoLoader(filename = '/home/lib/audio/work/arglaaa-mini/22.wav') fps = ess.Chromaprinter(analysisTime=20, concatenate=True) pool = ess.essentia.Pool() # Conecting the algorithms loader.audio >> fps.signal fps.fingerprint >> (pool, 'chromaprint') ess.essentia.run(loader) fp = pool['chromaprint'][0] print(('fp = {0}'.format(fp))) import acoustid as ai # import acoustid.chromaprint import codecs import numpy as np print('type(fp) = ', type(fp)) fpbytes = bytes(fp, 'utf-8') print('fpbytes = {0}'.format(fpbytes)) fp_int = ai.chromaprint.decode_fingerprint(fpbytes)[0]
import os import essentia as e import essentia.streaming as estr # CONFIGURATION # ================================================================================ # Default parameters sample_rate = 44100 window_size = 16384 hop_size = 8192 tuning_frequency = 440 # retrieve filenames from folder: soundfiles = os.listdir(audio_folder) if '.DS_Store' in soundfiles: soundfiles.remove('.DS_Store') # ANALYSIS # ================================================================================ print "\nANALYSIS..." for item in soundfiles: loader = estr.MonoLoader(filename=audio_folder+'/'+item,sampleRate=sample_rate) tuningExtractor = <estr.TuningFrequencyExtractor(frameSize=window_size,hopSize=hop_size) pool = e.Pool() loader.audio >> tuningExtractor.signal tuningExtractor.tuningFrequency >> (pool, 'tuning_reference') # run and print the results. e.run(loader) result = pool['tuning_reference'] print item[:20]+'... ', result
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh): gc.enable() # Loading audio file #will resample if sampleRate is different! try: audio = es.MonoLoader(filename=path, sampleRate=fs)() except: print("Erroneos File detected by essentia standard: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #will resample if sampleRate is different! try: loader = ess.MonoLoader(filename=path, sampleRate=44100) except: print("Erroneos File detected by essentia streaming: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #Initialize algorithms we will use frameSize = 4096 #512 hopSize = 2048 #256 ####################################### # DO FILTERING ONLY FOR MFCC - not with essentia standard # below is just an example #HP = es.HighPass(cutoffFrequency=128) #LP = es.LowPass(cutoffFrequency=4096) #lp_f = LP(audio) #hp_f = HP(lp_f) #audio = hp_f #MonoWriter(filename='music/filtered.wav')(filtered_audio) HP = ess.HighPass(cutoffFrequency=128) LP = ess.LowPass(cutoffFrequency=4096) #loader = ess.MonoLoader(filename=path, sampleRate=44100) #writer = ess.MonoWriter(filename='music/filtered.wav') #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512) #pool = essentia.Pool() # Connect streaming algorithms #loader.audio >> HP.signal #HP.signal >> LP.signal #LP.signal >> writer.audio # Run streaming network #essentia.run(loader) bpm = 0 histogram = 0 key = 0 scale = 0 notes = 0 chroma_matrix = 0 mean = 0 cov = 0 var = 0 cov_kl = 0 ##################################### # extract mfcc ##################################### if f_mfcc_kl == 1 or f_mfcc_euclid == 1: #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path) #m, n = features['lowlevel.mfcc.cov'].shape #print m #iu1 = np.triu_indices(m) #cov = features['lowlevel.mfcc.cov'][iu1] #mean = features['lowlevel.mfcc.mean'] #print(features['lowlevel.mfcc.cov']) hamming_window = es.Windowing(type='hamming') spectrum = es.Spectrum() # we just want the magnitude spectrum mfcc = es.MFCC(numberCoefficients=13) frame_sz = 2048 #512 hop_sz = 1024 #256 mfccs = np.array([ mfcc(spectrum(hamming_window(frame)))[1] for frame in es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz) ]) #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance: #mfccs = sklearn.preprocessing.scale(mfccs) #print mfccs.shape mean = np.mean(mfccs.T, axis=1) #print(mean) var = np.var(mfccs.T, axis=1) #print(var) cov = np.cov(mfccs.T) cov_kl = cov #.flatten() #get only upper triangular matrix values to shorten length iu1 = np.triu_indices(13) cov = cov[iu1] #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest') #plt.ylabel('MFCC Coefficient Index') #plt.xlabel('Frame Index') #plt.colorbar() ##################################### # extract beat features and histogram ##################################### if f_bh == 1 or f_chroma == 1 or f_notes == 1: # Compute beat positions and BPM rhythm_extractor = es.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) if f_bh == 1: peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors( )(beats_intervals) tempo = bpm times = beats beats_frames = (beats * fs) / hopSize beats_frames = beats_frames.astype(int) #fig, ax = plt.subplots() #ax.bar(range(len(histogram)), histogram, width=1) #ax.set_xlabel('BPM') #ax.set_ylabel('Frequency') #plt.title("BPM histogram") #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))]) #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))]) #plt.show() ##################################### # extract full beat aligned chroma ##################################### framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters for plots, however we will need higher resolution # and custom parameters for better Key estimation hpcp = ess.HPCP() hpcp_key = ess.HPCP( size=36, # we will need higher resolution for Key estimation referenceFrequency=440, # assume tuning frequency is 44100. bandPreset=False, minFrequency=20, maxFrequency=3500, weightType='cosine', nonLinear=False, windowSize=1.) key = ess.Key( profileType='edma', # Use profile for electronic music numHarmonics=4, pcpSize=36, slope=0.6, usePolyphony=True, useThreeChords=True) # Use pool to store data pool = essentia.Pool() # Connect streaming algorithms ################################### # USE FILTER - comment next lines in loader.audio >> HP.signal HP.signal >> LP.signal LP.signal >> framecutter.signal ################################### ################################### # NO FILTER - comment next line in #loader.audio >> framecutter.signal ################################### framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies spectralpeaks.magnitudes >> hpcp_key.magnitudes spectralpeaks.frequencies >> hpcp_key.frequencies hpcp_key.hpcp >> key.pcp hpcp.hpcp >> (pool, 'tonal.hpcp') key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') # Run streaming network essentia.run(loader) #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale']) #print(pool['tonal.hpcp'].T) chroma = pool['tonal.hpcp'].T key = pool['tonal.key_key'] scale = pool['tonal.key_scale'] if f_chroma == 1: # Plot HPCP #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none') #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)") #show() #print beats_frames.shape[0] chroma_matrix = np.zeros((beats_frames.shape[0], 12)) prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma_align = chroma chroma_align = chroma_align.transpose() mat_index = 0 for i in beats_frames: act_beat = i value = sum( chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat) chroma_align[prev_beat:act_beat] = value prev_beat = i if np.linalg.norm(value, ord=1) != 0: value = value / np.linalg.norm(value, ord=1) chroma_matrix[mat_index] = value mat_index = mat_index + 1 #chroma_align = chroma_align.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma_align = chroma_align.transpose() #print(chroma_align[24:28]) ##################################### # extract full chroma text ##################################### if f_notes == 1: #print(chroma.shape) m, n = chroma.shape avg = 0 chroma = chroma.transpose() m, n = chroma.shape for j in chroma: avg = avg + np.sum(j) avg = avg / m threshold = avg / 2 for i in chroma: if np.sum(i) > threshold: ind = np.where(i == np.max(i)) max_val = i[ind] #is always 1! i[ind] = 0 ind2 = np.where(i == np.max(i)) i[ind] = 1 #if np.any(i[ind2][0] >= 0.8 * max_val): #i[ind2] = i[ind2] #pass #low_values_flags = i < 1 low_values_flags = i < 0.8 i[low_values_flags] = 0 else: i.fill(0) chroma = chroma.transpose() # Compute beat positions and BPM prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma = chroma.transpose() for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) #print(sum_key) #print(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) ind = ind[0] #print("debug") fill = np.zeros(len(j)) if (np.all(chroma[prev_beat:act_beat] == 0)): fill[ind] = 0 else: fill[ind] = 1 chroma[prev_beat:act_beat] = fill #print(chroma[prev_beat:act_beat]) prev_beat = i #print("BEAT") notes = [] for i in notes: del i prev_beat = 0 act_beat = 0 for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) prev_beat = i notes.append(ind[0][0]) prev_beat = i #chroma = chroma.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma = chroma.transpose() gc.collect() return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl