def filter_loops(): loops = os.listdir(CHOPPED_PATH) proc_loops = os.listdir(EQ_NEW_PATH) lp_filter = es.LowPass(cutoffFrequency=90,sampleRate=sampleRate) bp_filter = es.BandPass(bandwidth=100 ,cutoffFrequency=280,sampleRate=sampleRate) hp_filter = es.HighPass(cutoffFrequency=9000,sampleRate=sampleRate) i=0 for loop in loops: i=i+1 if i % 50 == 0: print(str(i)) if ".wav" in loop: if ("bpf_" + loop) not in proc_loops: audio_file=es.MonoLoader(filename=CHOPPED_PATH+loop,sampleRate=sampleRate) #lpf_audio = lp_filter(audio_file()) bpf_audio = bp_filter(audio_file()) #hpf_audio = hp_filter(audio_file()) #sf.write(EQ_PATH + "lpf_" + loop, lpf_audio, sampleRate) sf.write(EQ_NEW_PATH + "bpf_" + loop, bpf_audio, sampleRate)
def filter_loops_eval(): loops_paths = [ "icassp2021_outputs/outputs_stft_coherence/", "icassp2021_outputs/outputs_wavstft_coherence/"] lp_filter = es.LowPass(cutoffFrequency=90,sampleRate=sampleRate) bp_filter = es.BandPass(bandwidth=100 ,cutoffFrequency=280,sampleRate=sampleRate) hp_filter = es.HighPass(cutoffFrequency=9000,sampleRate=sampleRate) for path in loops_paths: loops = os.listdir(path) for loop in loops: if ".wav" in loop: audio_file=es.MonoLoader(filename=path+loop,sampleRate=sampleRate) if "lpf" in loop: lpf_audio = lp_filter(audio_file()) sf.write(path + "eq/" + loop, lpf_audio, sampleRate) if "bpf" in loop: bpf_audio = bp_filter(audio_file()) sf.write(path + "eq/" + loop, bpf_audio, sampleRate) if "hpf" in loop: hpf_audio = hp_filter(audio_file()) sf.write(path + "eq/" + loop, hpf_audio, sampleRate)
def onset_detection(audio): """ Onset detection using Convolutional Neural Networks. This algorithm is developed by Sebastian Bock and Jan Schluter and implemented in Madmom MIR python library. :type audio: vector_real :param audio: input audio signal :rtype onset_times: vector_real :return onset_times: onset times in samples """ audio_filt = es.BandPass( bandwidth=200, cutoffFrequency=300 )(array(audio / max(audio))) onset_strength = onsets.CNNOnsetProcessor()(audio_filt) onset_frames = onsets.peak_picking( onset_strength, threshold=0.9, smooth=6 ) frame_rate = len(audio)/len(onset_strength) onset_times = onset_frames*frame_rate return onset_times
def analysis_function(loop, sampleRate=16000): lp_filter = es.LowPass(cutoffFrequency=90, sampleRate=sampleRate) bp_filter = es.BandPass(bandwidth=20, cutoffFrequency=280, sampleRate=sampleRate) hp_filter = es.HighPass(cutoffFrequency=9000, sampleRate=sampleRate) [_, pattern] = ADT([loop], output_act='yes', tab='no', save_dir="analysis/") pattern = np.array(pattern)[0] time_audio = np.linspace(0, float(29538) / 16000, 29538) time_act = np.linspace(0, float(29538) / 16000, 160) final_pattern = np.clip( np.array([ interp1d(time_act, pattern[0, :, 0])(time_audio), interp1d(time_act, pattern[1, :, 0])(time_audio), interp1d(time_act, pattern[2, :, 0])(time_audio) ]).T, 0.0, 1.0) final_pattern = final_pattern / final_pattern.max(axis=0) final_pattern = np.expand_dims(final_pattern, 0) audio_file = es.MonoLoader(filename=loop, sampleRate=sampleRate) loop_basename = ntpath.basename(loop) lpf_audio = lp_filter(audio_file()) bpf_audio = bp_filter(audio_file()) hpf_audio = hp_filter(audio_file()) sf.write("analysis/lpf_" + loop_basename, lpf_audio, sampleRate) sf.write("analysis/bpf_" + loop_basename, bpf_audio, sampleRate) sf.write("analysis/hpf_" + loop_basename, hpf_audio, sampleRate) unordered_kick_features = timbral_models.timbral_extractor( "analysis/lpf_" + loop_basename, clip_output=True) unordered_snare_features = timbral_models.timbral_extractor( "analysis/bpf_" + loop_basename, clip_output=True) unordered_hh_features = timbral_models.timbral_extractor("analysis/hpf_" + loop_basename, clip_output=True) features_kick = [ unordered_kick_features['warmth'] / 69.738235, unordered_kick_features['roughness'] / 71.95989, unordered_kick_features['brightness'] / 82.336105, unordered_kick_features['hardness'] / 75.53646, unordered_kick_features['boominess'] / 71.00043, unordered_kick_features['depth'] / 100.0, unordered_kick_features['sharpness'] / 81.7323, ] features_snare = [ unordered_snare_features['warmth'] / 69.57681, unordered_snare_features['roughness'] / 67.66642, unordered_snare_features['brightness'] / 80.19115, unordered_snare_features['hardness'] / 71.689445, unordered_snare_features['boominess'] / 61.422714, unordered_snare_features['depth'] / 100.0, unordered_snare_features['sharpness'] / 71.406494 ] features_hh = [ unordered_hh_features['warmth'] / 32.789112, unordered_hh_features['roughness'] / 1.0, unordered_hh_features['brightness'] / 85.24432, unordered_hh_features['hardness'] / 67.71172, unordered_hh_features['boominess'] / 2.491137, unordered_hh_features['depth'] / 0.5797179, unordered_hh_features['sharpness'] / 87.83693 ] hpcp = file_to_hpcp(audio_file()) #[69.57681, 67.66642, 80.19115, 71.689445, 61.422714, 100.0, 71.406494] #[32.789112, 1.0, 85.24432, 67.71172, 2.491137, 0.5797179, 87.83693] #[69.738235, 71.95989, 82.336105, 75.53646, 71.00043, 100.0, 81.7323] return final_pattern, hpcp, features_kick, features_snare, np.clip( features_hh, 0, 1)
def onsets_per_bands(self): ''' Performs a band scale onset analysis of the drums :return: analysisResults: an essentia pool containing the bandbased and broadband analysis of the drums ''' # Pool to save results analysisResults = Pool() # save audio in pool analysisResults.add("audio", self.audio) drum_length = len(self.audio) / self.sampleRate # create grid beats, grid, onsets = self.onsets_broad_band() grid = array(grid) grid = grid[grid <= drum_length] analysisResults.add("beats", array(beats)) analysisResults.add("grid", grid) analysisResults.add("onsets", array(onsets)) grid_res = grid[1] - grid[0] # create filter specs (band band band pass filters) # ref: http://essentia.upf.edu/documentation/reference/streaming_bandBands.html ''' f0s = np.array([0.0, 50.0, 100.0, 150.0, 200.0, 300.0, 400.0, 510.0, 630.0, 770.0, 920.0, 1080.0, 1270.0, 1480.0, 1720.0, 2000.0, 2320.0, 2700.0, 3150.0, 3700.0, 4400.0, 5300.0, 6400.0, 7700.0, 9500.0, 12000.0, 15500.0, 20500.0]) f1s = np.array([50.0, 100.0, 150.0, 200.0, 300.0, 400.0, 510.0, 630.0, 770.0, 920.0, 1080.0, 1270.0, 1480.0, 1720.0, 2000.0, 2320.0, 2700.0, 3150.0, 3700.0, 4400.0, 5300.0, 6400.0, 7700.0, 9500.0, 12000.0, 15500.0, 20500.0, 27000.0]) ''' #http://www.music.mcgill.ca/~ich/classes/mumt614/similarity/herrera02automatic.pdf f0s = np.array([40., 70., 130., 160., 300., 5000., 7000., 10000.]) f1s = np.array([70., 110., 145., 190., 400., 7000., 10000., 15000.]) bandwidths = f1s - f0s cutoffFrequencies = (f0s + f1s) / 2. analysisResults.add( "x_time", array(np.arange(len(self.audio)) / self.sampleRate)) analysisResults.add("f0s", array(f0s)) analysisResults.add("f1s", array(f1s)) analysisResults.add("bandwidths", array(bandwidths)) analysisResults.add("cutoffFrequencies", array(cutoffFrequencies)) # matrix of onsets: dimension 1: freq band dimension 2: onsets snapped to grid (1 where onset, 0 where no onset) drum_onsets_quantized = [] # matrix of energies: dimension 1: freq band dimension 2: energies where 1 in drum_onsets_quantized drum_onset_energies_quantized = [] # filter and find onsets for ix, f0 in enumerate(f0s): # Create band pass filters and filter the signal print("band", str(ix), "is being calculated") BPF = es.BandPass(bandwidth=bandwidths[ix], cutoffFrequency=cutoffFrequencies[ix], sampleRate=self.sampleRate) signal = BPF(array(self.audio)) onsets = self.get_onsets(_audio=signal) analysisResults.add("audio_band_fc_" + str(cutoffFrequencies[ix]), signal) #calculate energy of each onset (starting grid_res/4 before to after) energies = [] EnergyEstimator = es.Energy() #maxEnergy = EnergyEstimator(array(np.hanning(int(grid_res/2.0*self.sampleRate)) * # np.random(int(grid_res/2.0*self.sampleRate)))) # rough estimate ''' # this part calculates energy within a small windowed frame around the onset max_Energy = 0 for onset_ix, onset in enumerate(onsets): #ix0 = int(max(((onset - grid_res/16)*self.sampleRate), 0)) #ix1 = int(min(((onset + grid_res/5.33)*self.sampleRate), len(signal)-1)) ix0 = int(max((onset*self.sampleRate), 0)) ix1 = int(max(ix0 + 512, len(signal)-2)) sig = signal[ix0:ix1] sig = np.append(sig[::-1], sig[1:]) if len(sig)>=.01*44100: window = es.Windowing(size=int(len(sig))) energies.append(EnergyEstimator(window(sig))) else: onsets = np.delete(onsets,onset_ix) ''' # this part calculates energy from one onset to half grid max_Energy = 0 for onset_ix, onset in enumerate(onsets): # ix0 = int(max(((onset - grid_res/16)*self.sampleRate), 0)) # ix1 = int(min(((onset + grid_res/5.33)*self.sampleRate), len(signal)-1)) ix0 = int(max((onset * self.sampleRate), 0)) ix1 = int( max((onset * self.sampleRate + grid_res / 2), len(signal) - 2)) sig = signal[ix0:ix1] sig = np.append(sig[::-1], sig[1:]) if len(sig) >= .01 * 44100: #window = es.Windowing(size=int(len(sig))) energies.append(EnergyEstimator(sig)) else: onsets = np.delete(onsets, onset_ix) max_Energy = max(max_Energy, max(np.array(energies))) analysisResults.add("onsets_band_fc_" + str(cutoffFrequencies[ix]), onsets) analysisResults.add( "energies_band_fc_" + str(cutoffFrequencies[ix]), energies) quantized_onset_array_in_band, quantized_energy_array_in_band = self.quantize_onsets( onsets, energies, grid) drum_onsets_quantized.append(quantized_onset_array_in_band) drum_onset_energies_quantized.append( quantized_energy_array_in_band) analysisResults.add("onsets_quantized_matrix", array(np.array(drum_onsets_quantized))) analysisResults.add( "energies_quantized_matrix", array(np.array(drum_onset_energies_quantized / max_Energy))) for ix, f0 in enumerate(f0s): analysisResults.add( "normalized_energies_band_fc_" + str(cutoffFrequencies[ix]), analysisResults["energies_band_fc_" + str(cutoffFrequencies[ix])][0] / max_Energy) return analysisResults