def get_key(file_in): """ Estimates the key and scale for an audio file. """ loader = streaming.MonoLoader(filename=file_in) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) pool = Pool() hpcp = streaming.HPCP() key = streaming.Key() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> key.pcp key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') run(loader) return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
def estimate_chroma(self, uid): loader = esstr.MonoLoader( filename=self.audio_path_extractor.audio_path_name(uid)) framecutter = esstr.FrameCutter(hopSize=self.hop_size, frameSize=self.frame_size) windowing = esstr.Windowing(type="blackmanharris62") spectrum = esstr.Spectrum() spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) hpcp = esstr.HPCP(size=12, referenceFrequency=self.tuning_freq, harmonics=8, bandPreset=True, minFrequency=float(40), maxFrequency=float(5000), bandSplitFrequency=500.0, weightType="cosine", nonLinear=True, windowSize=1.0) """ hpcp = esstr.HPCP( size=12, referenceFrequency = tuningFreq, harmonics = 8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, bandSplitFrequency = 250.0, weightType = "cosine", nonLinear = False, windowSize = 1.0) """ pool = essentia.Pool() # connect algorithms together loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectrum.spectrum >> (pool, 'spectrum.magnitude') spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> (pool, 'chroma.hpcp') essentia.run(loader) # roll from 'A' based to 'C' based chroma = pool['chroma.hpcp'] chroma = np.roll(chroma, shift=-3, axis=1) return chroma
def tonalAnalysis(self, signal): vectorinput = ess.VectorInput(np.single(signal)) framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='frequency', magnitudeThreshold=1e-5, minFrequency=20, maxFrequency=3500, maxPeaks=60) dissonance = ess.Dissonance() tuning_frequency = ess.TuningFrequency() inharmonicity = ess.Inharmonicity() # Use pool to store data pool = essentia.Pool() # Connect streaming algorithms vectorinput.data >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> dissonance.magnitudes spectralpeaks.frequencies >> dissonance.frequencies spectralpeaks.magnitudes >> tuning_frequency.magnitudes spectralpeaks.frequencies >> tuning_frequency.frequencies spectralpeaks.magnitudes >> inharmonicity.magnitudes spectralpeaks.frequencies >> inharmonicity.frequencies dissonance.dissonance >> (pool, 'tonal.dissonance') inharmonicity.inharmonicity >> (pool, 'tonal.inharmonicity') tuning_frequency.tuningFrequency >> (pool, 'tonal.tuningFrequency') tuning_frequency.tuningCents >> (pool, 'tonal.tuningCents') # Run streaming network essentia.run(vectorinput) return pool['tonal.dissonance'], pool['tonal.inharmonicity'], pool[ 'tonal.tuningFrequency']
] for name in names: for i in range(10): song_name = name + '.' + '0000' + str(i) + '.au' print(song_name) loader = ess.MonoLoader(filename="genres/" + name + "/" + song_name) framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters hpcp = ess.HPCP() # Use pool to store data pool = essentia.Pool() # Connect streaming algorithms loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies
soundfiles = os.listdir(audio_folder) if '.DS_Store' in soundfiles: soundfiles.remove('.DS_Store') print "\nANALYSIS..." for item in soundfiles: loader = estr.MonoLoader(filename=audio_folder+'/'+item, sampleRate=sample_rate) framecutter = estr.FrameCutter(frameSize=window_size, hopSize=hop_size) windowing = estr.Windowing(size=window_size, type=window_type) spectrum = estr.Spectrum(size=window_size) spectralpeaks = estr.SpectralPeaks(magnitudeThreshold=magnitude_threshold, minFrequency=min_frequency, maxFrequency=max_frequency, maxPeaks=max_peaks, sampleRate=sample_rate) hpcp = estr.HPCP(bandPreset=band_preset, harmonics = harmonics, minFrequency=min_frequency, maxFrequency=max_frequency, nonLinear=non_linear, normalized=normalize, referenceFrequency=reference_frequency, sampleRate=sample_rate, weightType=weight_type, windowSize=weight_window_size) pool = e.Pool() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh): gc.enable() # Loading audio file #will resample if sampleRate is different! try: audio = es.MonoLoader(filename=path, sampleRate=fs)() except: print("Erroneos File detected by essentia standard: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #will resample if sampleRate is different! try: loader = ess.MonoLoader(filename=path, sampleRate=44100) except: print("Erroneos File detected by essentia streaming: skipping!") #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl return 0, [], 0, 0, [], [], [], [], [], [] #Initialize algorithms we will use frameSize = 4096 #512 hopSize = 2048 #256 ####################################### # DO FILTERING ONLY FOR MFCC - not with essentia standard # below is just an example #HP = es.HighPass(cutoffFrequency=128) #LP = es.LowPass(cutoffFrequency=4096) #lp_f = LP(audio) #hp_f = HP(lp_f) #audio = hp_f #MonoWriter(filename='music/filtered.wav')(filtered_audio) HP = ess.HighPass(cutoffFrequency=128) LP = ess.LowPass(cutoffFrequency=4096) #loader = ess.MonoLoader(filename=path, sampleRate=44100) #writer = ess.MonoWriter(filename='music/filtered.wav') #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512) #pool = essentia.Pool() # Connect streaming algorithms #loader.audio >> HP.signal #HP.signal >> LP.signal #LP.signal >> writer.audio # Run streaming network #essentia.run(loader) bpm = 0 histogram = 0 key = 0 scale = 0 notes = 0 chroma_matrix = 0 mean = 0 cov = 0 var = 0 cov_kl = 0 ##################################### # extract mfcc ##################################### if f_mfcc_kl == 1 or f_mfcc_euclid == 1: #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path) #m, n = features['lowlevel.mfcc.cov'].shape #print m #iu1 = np.triu_indices(m) #cov = features['lowlevel.mfcc.cov'][iu1] #mean = features['lowlevel.mfcc.mean'] #print(features['lowlevel.mfcc.cov']) hamming_window = es.Windowing(type='hamming') spectrum = es.Spectrum() # we just want the magnitude spectrum mfcc = es.MFCC(numberCoefficients=13) frame_sz = 2048 #512 hop_sz = 1024 #256 mfccs = np.array([ mfcc(spectrum(hamming_window(frame)))[1] for frame in es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz) ]) #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance: #mfccs = sklearn.preprocessing.scale(mfccs) #print mfccs.shape mean = np.mean(mfccs.T, axis=1) #print(mean) var = np.var(mfccs.T, axis=1) #print(var) cov = np.cov(mfccs.T) cov_kl = cov #.flatten() #get only upper triangular matrix values to shorten length iu1 = np.triu_indices(13) cov = cov[iu1] #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest') #plt.ylabel('MFCC Coefficient Index') #plt.xlabel('Frame Index') #plt.colorbar() ##################################### # extract beat features and histogram ##################################### if f_bh == 1 or f_chroma == 1 or f_notes == 1: # Compute beat positions and BPM rhythm_extractor = es.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) if f_bh == 1: peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors( )(beats_intervals) tempo = bpm times = beats beats_frames = (beats * fs) / hopSize beats_frames = beats_frames.astype(int) #fig, ax = plt.subplots() #ax.bar(range(len(histogram)), histogram, width=1) #ax.set_xlabel('BPM') #ax.set_ylabel('Frequency') #plt.title("BPM histogram") #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))]) #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))]) #plt.show() ##################################### # extract full beat aligned chroma ##################################### framecutter = ess.FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='noise') windowing = ess.Windowing(type='blackmanharris62') spectrum = ess.Spectrum() spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60) # Use default HPCP parameters for plots, however we will need higher resolution # and custom parameters for better Key estimation hpcp = ess.HPCP() hpcp_key = ess.HPCP( size=36, # we will need higher resolution for Key estimation referenceFrequency=440, # assume tuning frequency is 44100. bandPreset=False, minFrequency=20, maxFrequency=3500, weightType='cosine', nonLinear=False, windowSize=1.) key = ess.Key( profileType='edma', # Use profile for electronic music numHarmonics=4, pcpSize=36, slope=0.6, usePolyphony=True, useThreeChords=True) # Use pool to store data pool = essentia.Pool() # Connect streaming algorithms ################################### # USE FILTER - comment next lines in loader.audio >> HP.signal HP.signal >> LP.signal LP.signal >> framecutter.signal ################################### ################################### # NO FILTER - comment next line in #loader.audio >> framecutter.signal ################################### framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies spectralpeaks.magnitudes >> hpcp_key.magnitudes spectralpeaks.frequencies >> hpcp_key.frequencies hpcp_key.hpcp >> key.pcp hpcp.hpcp >> (pool, 'tonal.hpcp') key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') # Run streaming network essentia.run(loader) #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale']) #print(pool['tonal.hpcp'].T) chroma = pool['tonal.hpcp'].T key = pool['tonal.key_key'] scale = pool['tonal.key_scale'] if f_chroma == 1: # Plot HPCP #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none') #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)") #show() #print beats_frames.shape[0] chroma_matrix = np.zeros((beats_frames.shape[0], 12)) prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma_align = chroma chroma_align = chroma_align.transpose() mat_index = 0 for i in beats_frames: act_beat = i value = sum( chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat) chroma_align[prev_beat:act_beat] = value prev_beat = i if np.linalg.norm(value, ord=1) != 0: value = value / np.linalg.norm(value, ord=1) chroma_matrix[mat_index] = value mat_index = mat_index + 1 #chroma_align = chroma_align.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma_align = chroma_align.transpose() #print(chroma_align[24:28]) ##################################### # extract full chroma text ##################################### if f_notes == 1: #print(chroma.shape) m, n = chroma.shape avg = 0 chroma = chroma.transpose() m, n = chroma.shape for j in chroma: avg = avg + np.sum(j) avg = avg / m threshold = avg / 2 for i in chroma: if np.sum(i) > threshold: ind = np.where(i == np.max(i)) max_val = i[ind] #is always 1! i[ind] = 0 ind2 = np.where(i == np.max(i)) i[ind] = 1 #if np.any(i[ind2][0] >= 0.8 * max_val): #i[ind2] = i[ind2] #pass #low_values_flags = i < 1 low_values_flags = i < 0.8 i[low_values_flags] = 0 else: i.fill(0) chroma = chroma.transpose() # Compute beat positions and BPM prev_beat = 0 act_beat = 0 sum_key = np.zeros(12) chroma = chroma.transpose() for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) #print(sum_key) #print(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) ind = ind[0] #print("debug") fill = np.zeros(len(j)) if (np.all(chroma[prev_beat:act_beat] == 0)): fill[ind] = 0 else: fill[ind] = 1 chroma[prev_beat:act_beat] = fill #print(chroma[prev_beat:act_beat]) prev_beat = i #print("BEAT") notes = [] for i in notes: del i prev_beat = 0 act_beat = 0 for i in beats_frames: act_beat = i sum_key = sum(chroma[prev_beat:act_beat]) ind = np.where(sum_key == np.max(sum_key)) prev_beat = i notes.append(ind[0][0]) prev_beat = i #chroma = chroma.transpose() #plt.figure(figsize=(10, 4)) #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time') #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats') #plt.colorbar() #plt.title('Chromagram') #plt.tight_layout() #chroma = chroma.transpose() gc.collect() return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl