def gettempoessentia(f): x, fs = librosa.load(f) avg_bpm, beat_start, confidence, tempo, beat_duration = ess.RhythmExtractor2013( method='multifeature')(x) print 'tempo is %i bpm - essentia\n' % int(avg_bpm) print tempo return avg_bpm
def check_bpm(self) -> float: """ This method runs an analyzer to determine the BPM. If the audio is shorter then the setted time margin, it is append multiple times with itself to make up for the missing data and increase accuracy. """ temp = self.audio if len(temp) < self.__margin: factor = int(np.round(self.__margin / len(temp))) temp = np.tile(temp, factor) rhythm_extractor = estd.RhythmExtractor2013() self.__bpm = rhythm_extractor(temp)[0] return self.__bpm
def algorithm_rhythm_essentia_basic(sound): """ Estimates bpm of given audio file using Zapata14 and Degara12. * Zapata14: Jose R Zapata, Matthew E P Davies, and Emilia Gomez. Multi-Feature Beat Tracking. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 22(4):816-825, 2014. * Degara12: Norberto Degara,Enrique Argones Rua,Antonio Pena, Soledad Torres-Guijarro, Matthew EP Davies, and Mark D Plumbley. Reliability-Informed Beat Track- ing of Musical Signals. IEEE Transactions on Audio, Speech, and Language Processing, 20(1):290-301, 2012. :param sound: sound dictionary from dataset :return: dictionary with results per different methods """ results = dict() audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100) # Method RhythmExtractor2013 - multifeature rhythm_extractor_2013 = estd.RhythmExtractor2013() bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio) results['Zapata14'] = {'bpm': bpm, 'confidence': float(confidence)} # Method RhythmExtractor2013 - degara rhythm_extractor_2013 = estd.RhythmExtractor2013(method='degara') bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio) results['Degara12'] = {'bpm': bpm} return results
def _essentia(path: str, params=None) -> float: """Using essentia to calculate the bpm of a given files. This function has been copied from the essentia examples here: https://essentia.upf.edu/essentia_python_examples.html """ info = pyo.sndinfo(path) audio = es.MonoLoader(filename=path, sampleRate=info[2])() # Compute beat positions and BPM rhythm_extractor = es.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) return bpm
def extract_beats(song_names, song_name2wav, bps): """ Finds and returns indices of beat-like events in the signal. Generally this would correspond to the rhythm you feel when listening to a song, but could be slightly different. """ s_beats = [] for i, s_name in enumerate(song_names): s_wav = song_name2wav[s_name] s_bpm, s_bs, s_conf, s_tempo, s_beat_dur = \ ess.RhythmExtractor2013(method='multifeature')(s_wav) # Use every two beats instead of just one s_bs = s_bs[::bps] s_bs = (s_bs*TARGET_FS).astype(int) s_beats.append(s_bs) return s_beats
def annotate_song(filepath): audio = load_audio(filepath) key, scale, key_strength = es.KeyExtractor(profileType='edma')(audio) key_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] # transforming bemol to sharp if len(key) == 2 and key[1] == 'b': cur_idx = key_list.index(key[0]) new_idx = (cur_idx - 1) % len(key_list) new_key = key_list[new_idx] + '#' key = new_key # for calculating bpm, its proven that RhythmExtractor2013 works best but takes longer rhythm_desc = es.RhythmExtractor2013()(audio) bpm = round(rhythm_desc[0]) del audio return {'bpm': bpm, 'key': key, 'key_scale': scale, 'key_strength': key_strength,}
robot.stow() filename = "./audios/drum_60.wav" # features, _ = es.MusicExtractor(lowlevelStats=['mean', 'stdev'], # rhythmStats=['mean', 'stdev'], # tonalStats=['mean', 'stdev'])(filename) # tempo = np.round(features['rhythm.bpm']) # print(tempo) # # t= 60.0/tempo * 2 audio = es.MonoLoader(filename= filename)() rhythm_extractor = es.RhythmExtractor2013(method="multifeature") bpm, beats, _, _, _ = rhythm_extractor(audio) # interonset = np.ediff1d(beats) # interonset = np.add.reduceat(interonset, np.arange(0, len(interonset), 2)) # interonset = np.round(interonset, decimals=3) tempo = np.round(bpm) t = 60/tempo * 2 t = np.round(t, decimals=2) xrotate=3.14 xtilt=0.5 xpan=1 xwrist=1.5 #start
def __get_bpm__(self): e_rhythmextractor2013 = e.RhythmExtractor2013(maxTempo=120, minTempo=40) bpm, ticks, confidence, estimates, bpmintervals = e_rhythmextractor2013(self.signal) #print("bpm:", bpm) assert isinstance(bpm, object) self.bpm = bpm
def get_beats(filepath): """Gets beat locations by sample number, as well as global BPM""" audio = es.MonoLoader(filename=filepath, sampleRate=sample_rate)() return es.RhythmExtractor2013(method='multifeature')(audio)
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh): gc.enable() # Loading audio file #will resample if sampleRate is different! try: audio = es.MonoLoader(filename=path, sampleRate=fs)() except: print("Erroneos File detected by essentia standard: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #will resample if sampleRate is different! try: loader = ess.MonoLoader(filename=path, sampleRate=44100) except: print("Erroneos File detected by essentia streaming: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #Initialize algorithms we will use frameSize = 4096 #512 hopSize = 2048 #256 ####################################### # DO FILTERING ONLY FOR MFCC - not with essentia standard # below is just an example #HP = es.HighPass(cutoffFrequency=128) #LP = es.LowPass(cutoffFrequency=4096) #lp_f = LP(audio) #hp_f = HP(lp_f) #audio = hp_f #MonoWriter(filename='music/filtered.wav')(filtered_audio) HP = ess.HighPass(cutoffFrequency=128) LP = ess.LowPass(cutoffFrequency=4096) #loader = ess.MonoLoader(filename=path, sampleRate=44100) #writer = ess.MonoWriter(filename='music/filtered.wav') #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512) #pool = essentia.Pool() # Connect streaming algorithms #loader.audio >> HP.signal #HP.signal >> LP.signal #LP.signal >> writer.audio # Run streaming network #essentia.run(loader) bpm = 0 histogram = 0 key = 0 scale = 0 notes = 0 chroma_matrix = 0 mean = 0 cov = 0 var = 0 cov_kl = 0 ##################################### # extract mfcc ##################################### if f_mfcc_kl == 1 or f_mfcc_euclid == 1: #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path) #m, n = features['lowlevel.mfcc.cov'].shape #print m #iu1 = np.triu_indices(m) #cov = features['lowlevel.mfcc.cov'][iu1] #mean = features['lowlevel.mfcc.mean'] #print(features['lowlevel.mfcc.cov']) hamming_window = es.Windowing(type='hamming') spectrum = es.Spectrum() # we just want the magnitude spectrum mfcc = es.MFCC(numberCoefficients=13) frame_sz = 2048 #512 hop_sz = 1024 #256 mfccs = np.array([ mfcc(spectrum(hamming_window(frame)))[1] for frame in es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz) ]) #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance: #mfccs = sklearn.preprocessing.scale(mfccs) #print mfccs.shape mean = np.mean(mfccs.T, axis=1) #print(mean) var = np.var(mfccs.T, axis=1) #print(var) cov = np.cov(mfccs.T) cov_kl = cov #.flatten() #get only upper triangular matrix values to shorten length iu1 = np.triu_indices(13) cov = cov[iu1] #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest') #plt.ylabel('MFCC Coefficient Index') #plt.xlabel('Frame Index') #plt.colorbar() ##################################### # extract beat features and histogram ##################################### if f_bh == 1 or f_chroma == 1 or f_notes == 1: # Compute beat positions and BPM rhythm_extractor = es.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) if f_bh == 1: peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors( )(beats_intervals) tempo = bpm times = beats beats_frames = (beats * fs) / hopSize beats_frames = beats_frames.astype(int) #fig, ax = plt.subplots() #ax.bar(range(len(histogram)), histogram, width=1) #ax.set_xlabel('BPM') #ax.set_ylabel('Frequency') #plt.title("BPM histogram") #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))]) #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))]) #plt.show() ##################################### # extract full beat aligned chroma ##################################### framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters for plots, however we will need higher resolution # and custom parameters for better Key estimation hpcp = ess.HPCP() hpcp_key = ess.HPCP( size=36, # we will need higher resolution for Key estimation referenceFrequency=440, # assume tuning frequency is 44100. bandPreset=False, minFrequency=20, maxFrequency=3500, weightType='cosine', nonLinear=False, windowSize=1.) key = ess.Key( profileType='edma', # Use profile for electronic music numHarmonics=4, pcpSize=36, slope=0.6, usePolyphony=True, useThreeChords=True) # Use pool to store data pool = essentia.Pool() # Connect streaming algorithms ################################### # USE FILTER - comment next lines in loader.audio >> HP.signal HP.signal >> LP.signal LP.signal >> framecutter.signal ################################### ################################### # NO FILTER - comment next line in #loader.audio >> framecutter.signal ################################### framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies spectralpeaks.magnitudes >> hpcp_key.magnitudes spectralpeaks.frequencies >> hpcp_key.frequencies hpcp_key.hpcp >> key.pcp hpcp.hpcp >> (pool, 'tonal.hpcp') key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') # Run streaming network essentia.run(loader) #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale']) #print(pool['tonal.hpcp'].T) chroma = pool['tonal.hpcp'].T key = pool['tonal.key_key'] scale = pool['tonal.key_scale'] if f_chroma == 1: # Plot HPCP #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none') #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)") #show() #print beats_frames.shape[0] chroma_matrix = np.zeros((beats_frames.shape[0], 12)) prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma_align = chroma chroma_align = chroma_align.transpose() mat_index = 0 for i in beats_frames: act_beat = i value = sum( chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat) chroma_align[prev_beat:act_beat] = value prev_beat = i if np.linalg.norm(value, ord=1) != 0: value = value / np.linalg.norm(value, ord=1) chroma_matrix[mat_index] = value mat_index = mat_index + 1 #chroma_align = chroma_align.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma_align = chroma_align.transpose() #print(chroma_align[24:28]) ##################################### # extract full chroma text ##################################### if f_notes == 1: #print(chroma.shape) m, n = chroma.shape avg = 0 chroma = chroma.transpose() m, n = chroma.shape for j in chroma: avg = avg + np.sum(j) avg = avg / m threshold = avg / 2 for i in chroma: if np.sum(i) > threshold: ind = np.where(i == np.max(i)) max_val = i[ind] #is always 1! i[ind] = 0 ind2 = np.where(i == np.max(i)) i[ind] = 1 #if np.any(i[ind2][0] >= 0.8 * max_val): #i[ind2] = i[ind2] #pass #low_values_flags = i < 1 low_values_flags = i < 0.8 i[low_values_flags] = 0 else: i.fill(0) chroma = chroma.transpose() # Compute beat positions and BPM prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma = chroma.transpose() for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) #print(sum_key) #print(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) ind = ind[0] #print("debug") fill = np.zeros(len(j)) if (np.all(chroma[prev_beat:act_beat] == 0)): fill[ind] = 0 else: fill[ind] = 1 chroma[prev_beat:act_beat] = fill #print(chroma[prev_beat:act_beat]) prev_beat = i #print("BEAT") notes = [] for i in notes: del i prev_beat = 0 act_beat = 0 for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) prev_beat = i notes.append(ind[0][0]) prev_beat = i #chroma = chroma.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma = chroma.transpose() gc.collect() return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl