def save_separated_audiofiles(self): # check and see if directory exists if self.directory != "" and ("/" in self.directory): directoryLevels = self.directory.split("/") for ixLevel, directoryLevel in enumerate(directoryLevels): if ixLevel == 0: LevelPath = directoryLevel else: LevelPath += "/" + directoryLevel if not os.path.isdir(LevelPath): os.mkdir(LevelPath) # Create audio writer object if self.fsIsSpecified == "False": # Notify user if sampling rate not specified print( "Sample Rate not specified for writing the audio files. Assumed Fs (Hz) is " + str(self.fs)) if self.formatIsSpecified == "False": # Notify user if format not specified print( "File format not specified for writing the audio files. Assumed format is " + str(self.format)) MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format) MonoWriter.configure(filename=self.directory + self.filename + "_percussive." + self.format) MonoWriter(array(self.x_p)) MonoWriter = es.MonoWriter(sampleRate=self.fs, format=self.format) MonoWriter.configure(filename=self.directory + self.filename + "_harmonic." + self.format) MonoWriter(array(self.x_h))
def bassline_harmonic_anal(self): params = dict() bassline_audio, params["sampleRate"] = self.harmonic_canvas.get_audio() params["frameSize"] = int( self.harmonic_canvas.frame_size_comboBox.currentText()) params["fftSize"] = int( self.harmonic_canvas.fft_size_comboBox.currentText()) params["hopSize"] = int( eval(self.harmonic_canvas.hop_size_comboBox.currentText()) * params["frameSize"]) params["fftSize"] = params["hopSize"] params["maxFrequency"] = 20000 params["minFrequency"] = 20 params["maxnSines"] = 100 params["magnitudeThreshold"] = -85 params['minSineDur'] = .05 params["freqDevOffset"] = 50 params["freqDevSlope"] = .1 _, sine_audio, res_audio = self.analysis_synthesis_spr_model_standard( params, bassline_audio) #filename = self.harmonic_canvas. har_sav_location = self.bassline_files_comboBox.currentText( )[:-4].replace("_harmonic", "_spr_sine.wav") res_sav_location = self.bassline_files_comboBox.currentText( )[:-4].replace("_harmonic", "_spr_res.wav") per_sav_location = self.drum_files_comboBox.currentText()[:-4].replace( "_percussive", "_with_spr_res.wav") drums_audio, sample_rate = self.percussive_canvas.get_audio() self.StatusBarSignal.emit("Separating Sinusoidal And Residual Parts") Length = min(len(sine_audio), len(res_audio), len(drums_audio)) drums_audio_plus_residual = drums_audio[-Length:] + res_audio[ -Length:] # add residual to percussive part self.StatusBarSignal.emit( "Writing Separated Files and the Enhanced Percussion Audio") es.MonoWriter(filename=har_sav_location, format="wav")(sine_audio) es.MonoWriter(filename=res_sav_location, format="wav")(res_audio) es.MonoWriter(filename=per_sav_location, format="wav")(drums_audio_plus_residual) self.find_separated_files() # update drop down file lists
def energyThresholdAudio(soundfilesList): for sound in soundfilesList: RMS = esst.RMS() audioLoader = esst.MonoLoader(filename=sound) audio = audioLoader() start=0 end=0 thresh=0.05 rms_vals=[] for frame in esst.FrameGenerator(audio, frameSize=2048, hopSize=1024, startFromZero=True): rms = RMS(frame) rms_vals.append(float(rms)) rms_vals = np.array(rms_vals) higher=np.where(rms_vals >= thresh)[0] if len(higher) > 1: start=higher[0] end=higher[-1] else: continue newAudio = audio[start*1024:end*1024] writer = esst.MonoWriter(filename=sound, format="mp3") writer(newAudio) print (sound)
def clear_end(self): print('enter clear_end') if len(self.record) != 0: oname = "onlinetemp.wav" es.MonoWriter(filename=oname, sampleRate=16000)(self.record) c_pBuf = create_string_buffer('', 10000) # temp = c_char('') # pi = POINTER(c_char)(temp) # lib.process(oname, byref(pi)) # data = pi.content lib.process(oname, c_pBuf) data = string_at(c_pBuf) if len(data) == 0: return print(data) s = self.parent.parent.nameDict[self.last][:-4] + ": " + data self.writeToOutput(s) self.num = self.num + 1 self.record = []
def compute_all_features(file_struct, audio_beats=False, overwrite=False): """Computes all the features for a specific audio file and its respective human annotations. It creates an audio file with the estimated beats if needed.""" # Output file out_file = file_struct.features_file if os.path.isfile(out_file) and not overwrite: return # Do nothing, file already exist and we are not overwriting it # Compute the features for the given audio file audio, features = compute_features_for_audio_file(file_struct.audio_file) # Save output as audio file if audio_beats: logging.info("Saving Beats as an audio file") marker = ES.AudioOnsetsMarker(onsets=features["beats"], type='beep', sampleRate=msaf.Anal.sample_rate) marked_audio = marker(audio) ES.MonoWriter(filename='beats.wav', sampleRate=msaf.Anal.sample_rate)(marked_audio) # Read annotations if they exist in path/references_dir/file.jams if os.path.isfile(file_struct.ref_file): jam = jams2.load(file_struct.ref_file) # If beat annotations exist, compute also annotated beatsyn features if jam.beats != []: logging.info("Reading beat annotations from JAMS") annot = jam.beats[0] annot_beats = [] for data in annot.data: annot_beats.append(data.time.value) annot_beats = essentia.array(np.unique(annot_beats).tolist()) annot_mfcc, annot_hpcp, annot_tonnetz = compute_features( audio, annot_beats) # Save output as json file logging.info("Saving the JSON file in %s" % out_file) yaml = YamlOutput(filename=out_file, format='json') pool = essentia.Pool() pool.add("beats.times", features["beats"]) pool.add("beats.confidence", features["beats_conf"]) pool.set("analysis.sample_rate", msaf.Anal.sample_rate) pool.set("analysis.frame_rate", msaf.Anal.frame_size) pool.set("analysis.hop_size", msaf.Anal.hop_size) pool.set("analysis.window_type", msaf.Anal.window_type) pool.set("analysis.mfcc_coeff", msaf.Anal.mfcc_coeff) pool.set("timestamp", datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S")) save_features("framesync", pool, features["mfcc"], features["hpcp"], features["tonnetz"]) save_features("est_beatsync", pool, features["bs_mfcc"], features["bs_hpcp"], features["bs_tonnetz"]) if os.path.isfile(file_struct.ref_file) and jam.beats != []: save_features("ann_beatsync", pool, annot_mfcc, annot_hpcp, annot_tonnetz) yaml(pool)
def recognize(self, audio_list): print("len") print(len(audio_list)) for audio in audio_list: #newaudio = es.Resample(outputSampleRate=16000)(audio) #es.MonoWriter(filename=self._TEMP_FILE_NAME, sampleRate=16000)(newaudio) es.MonoWriter(filename=self._TEMP_FILE_NAME, sampleRate=16000)(audio) #name = str(self.count) + "test.wav" #self.count = self.count + 1 #es.MonoWriter(filename=name, sampleRate=16000)(audio) voice = open(self._TEMP_FILE_NAME, "rb") result = self.recognizer.identify_speaker(voice) data = "" if result != self.last and len(self.record) > 0: #oname = str(self.num) + "rec.wav" #es.MonoWriter(filename = oname, sampleRate=16000)(self.record) #self.num = self.num + 1 oname = "onlinetemp.wav" es.MonoWriter(filename=oname, sampleRate=16000)(self.record) c_pBuf = create_string_buffer('', 10000) #temp = c_char('') #pi = POINTER(c_char)(temp) #lib.process(oname, byrihh(pi)) #data = pi.content lib.process(oname, c_pBuf) data = string_at(c_pBuf) if len(data) > 0: print(data) s = self.parent.parent.nameDict[result][:-4] + ": " + data self.writeToOutput(s) self.record = [] self.record = self.record + audio self.last = result else: self.record = self.record + audio self.last = result print result
def compute_all_features(audio_file, audio_beats=False): """Computes all the features for a specific audio file and its respective human annotations. Returns ------- features : dict Dictionary with the following features: mfcc : np.array Mel Frequency Cepstral Coefficients representation hpcp : np.array Harmonic Pitch Class Profiles tonnets : np.array Tonal Centroids (or Tonnetz) """ # Makes sure the output features folder exists utils.ensure_dir(OUTPUT_FEATURES) features_file = os.path.join(OUTPUT_FEATURES, os.path.basename(audio_file) + ".json") # If already precomputed, read and return if os.path.exists(features_file): with open(features_file, "r") as f: features = json.load(f) return list_to_array(features) # Load Audio logging.info("Loading audio file %s" % os.path.basename(audio_file)) audio = ES.MonoLoader(filename=audio_file, sampleRate=SAMPLE_RATE)() duration = len(audio) / float(SAMPLE_RATE) # Estimate Beats features = {} ticks, conf = compute_beats(audio) ticks = np.concatenate(([0], ticks, [duration])) # Add first and last time ticks = essentia.array(np.unique(ticks)) features["beats"] = ticks.tolist() # Compute Beat-sync features features["mfcc"], features["hpcp"], features["tonnetz"] = \ compute_beatsync_features(ticks, audio) # Save output as audio file if audio_beats: logging.info("Saving Beats as an audio file") marker = ES.AudioOnsetsMarker(onsets=ticks, type='beep', sampleRate=SAMPLE_RATE) marked_audio = marker(audio) ES.MonoWriter(filename='beats.wav', sampleRate=SAMPLE_RATE)(marked_audio) # Save features with open(features_file, "w") as f: json.dump(features, f) return list_to_array(features)
def add_click(infile, beats, outfile): """ Adds a click track to a song according to the specified beats. This is used for debugging beat alignment. """ audio = standard.MonoLoader(filename=infile)() marker = standard.AudioOnsetsMarker(onsets=beats, type='beep') marked_audio = marker(audio) standard.MonoWriter(filename=outfile)(marked_audio)
def HPFilter(audio, cutoff): HPF = es.HighPass(cutoffFrequency=cutoff) filtered_audio = HPF(audio) writer = es.MonoWriter(filename='holst_test.wav') writer(filtered_audio) return filtered_audio
def mix(audio1, audio2, sr): """ Function to mix audios with a normalised loudness :param audio1: Audio vector to normalize :param audio2: Audio vector to normalize :param sr: Sample rate of the final mix :return: Audio vector of the normalised mix """ if audio1.ndim > 1: audio1 = std.MonoMixer()(audio1, audio1.shape[1]) if audio2.ndim > 1: audio2 = std.MonoMixer()(audio2, audio2.shape[1]) std.MonoWriter(filename='temporal1.wav', sampleRate=sr)(audio1) std.MonoWriter(filename='temporal2.wav', sampleRate=sr)(audio2) stream1 = (ffmpeg.input('temporal1.wav').filter('loudnorm')) stream2 = (ffmpeg.input('temporal2.wav').filter('loudnorm')) merged_audio = ffmpeg.filter([stream1, stream2], 'amix') ffmpeg.output(merged_audio, 'temporal_o.wav').overwrite_output().run() audio_numpy = std.MonoLoader(filename='temporal_o.wav')() return audio_numpy
def save(self, file_path: Path = Path.cwd() / ".temp" / "audio.wav") -> None: """ Export the AudioFile at the newly given path. """ try: file_path.parent.mkdir() except FileExistsError: pass if len(self.audio.shape) != 2: estd.MonoWriter( filename=file_path.as_posix(), format=file_path.suffix[1:], sampleRate=self.sample_rate, )(self.audio) else: estd.AudioWriter( filename=file_path.as_posix(), format=file_path.suffix[1:], sampleRate=self.sample_rate, )(self.audio)
for f in find_files(in_folder, '.wav'): audio = es.MonoLoader(filename=f, sampleRate=fs)() original_len = len(audio) start_jump = original_len // 4 if audio[start_jump] > 0: # Want at least a gap of .5 end = next(idx for idx, i in enumerate(audio[start_jump:]) if i < -.3) else: end = next(idx for idx, i in enumerate(audio[start_jump:]) if i > .3) end_jump = start_jump + end audio = np.hstack([audio[:start_jump], audio[end_jump:]]) text = ['{}\t0.0\tevent\n'.format(start_jump / float(fs))] if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) with open('{}/{}_prominent_jump.lab'.format(out_folder, f_name), 'w') as o_file: o_file.write(''.join(text)) es.MonoWriter( filename='{}/{}_prominent_jump.wav'.format(out_folder, f_name))( esarr(audio))
in_folder = '/home/pablo/data/sns-small/samples' out_folder = '/home/pablo/reps/essentia/test/QA-audio/Hum/Songs50HzHum' fs = 44100. files = [x for x in find_files(in_folder, 'flac')] if not files: print('no files found!') for f in files: try: audio = es.MonoLoader(filename=f, sampleRate=fs)() except Exception: print('{} was not loaded'.format(f)) continue fs = 44100. t = np.linspace(0, len(audio) / fs, len(audio)) freq = 50 sinusoid = np.sin(2 * PI * freq * t) signal = np.array(.95 * audio + .005 * sinusoid, dtype=np.float32) if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) es.MonoWriter(filename='{}/{}_hum.wav'.format(out_folder, f_name))( esarr(signal))
fs = 44100. files = [x for x in find_files(in_folder, 'wav')] if not files: print('no files found!') for f in files: try: audio = es.MonoLoader(filename=f, sampleRate=fs)() except Exception: print('{} was not loaded'.format(f)) continue original_len = len(audio) start_jump = original_len // 4 end_jump = start_jump + int(np.abs(np.random.randn()) * fs) audio[start_jump:end_jump] = np.zeros(end_jump - start_jump) text = ['{}\t{}\tevent\n'.format(start_jump / float(fs), end_jump / float(fs))] if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) with open('{}/{}_gap.lab'.format(out_folder, f_name), 'w') as o_file: o_file.write(''.join(text)) es.MonoWriter(filename='{}/{}_gap.wav'.format(out_folder, f_name))(esarr(audio))
import matplotlib.pylab as plt if mode == 'standard': # create an audio loader and import audio file loader = std.MonoLoader(filename = inputFilename, sampleRate = 44100) audio = loader() print("Duration of the audio sample [sec]:") print(len(audio)/44100.0) w = std.Windowing(type = "hann"); fft = std.FFT(size = framesize); ifft = std.IFFT(size = framesize); overl = std.OverlapAdd (frameSize = framesize, hopSize = hopsize); awrite = std.MonoWriter (filename = outputFilename, sampleRate = 44100); for frame in std.FrameGenerator(audio, frameSize = framesize, hopSize = hopsize): # STFT analysis infft = fft(w(frame)) # here we could apply spectral transformations outfft = infft # STFT synthesis ifftframe = ifft(outfft) out = overl(ifftframe) if counter >= (framesize/(2*hopsize)): audioout = np.append(audioout, out)
data = np.genfromtxt(os.path.join(folder, mbid, mbid + '-bangu.csv'), delimiter=',') loader = es.MonoLoader(filename=os.path.join(folder, 'ban.mp3')) ban = loader() loader = es.MonoLoader(filename=os.path.join(folder, 'gu.mp3')) gu = loader() loader = es.MonoLoader(filename=os.path.join(folder, mbid, mbid + '-acc.mp3')) acc = loader() print(acc.size / 44100) new = np.zeros(acc.size) for i in range(len(data)): start = int(round(data[i, 0] * 44100)) if str(data[i, 2])[-1] == '0' or str(data[i, 2])[-1] == '1': end = start + ban.size new[start:end] = ban else: end = start + gu.size new[start:end] = gu newFile = os.path.join(folder, mbid, mbid + '-bangu.wav') es.MonoWriter(filename=newFile)(essentia.array(new)) # newFile = os.path.join(mbid, mbid+'-bangu.mp3') # es.MonoWriter(filename=newFile, format='mp3')(essentia.array(new))
#Snare for i in range(4 * num_4_4): snareAudio[int(beat_pos[i] * fs):int(beat_pos[i] * fs) + snareArray.size] = amplitude_snare[i] * snareGroove_4_4[ i % 4] * snareArray for i in range(5 * num_5_4): snareAudio[int(beat_pos[i + 4 * num_4_4] * fs):int(beat_pos[i + 4 * num_4_4] * fs) + snareArray.size] = amplitude_snare[ i + 4 * num_4_4] * snareGroove_5_4[i % 5] * snareArray #Hat for i in range(num_beats): hatAudio[int(beat_pos[i] * fs):int(beat_pos[i] * fs) + hatArray.size] = amplitude_hat[i] * hatArray #Adding together into one. x = float(randint(80, 100)) / 100 file_time = fs * (beat_pos[num_beats - 1] + x) print x, file_time / fs for i in range(int(file_time)): kickAudio[i] = kickAudio[i] + snareAudio[i] + hatAudio[i] #Writing to wav file writer = ess.MonoWriter(filename=str(OUTPUT_DIRECTORY_AUDIO) + str(indexx) + ".wav", format="wav") write = writer(kickAudio[0:int(file_time)].astype('single'))
# Compute beat positions and BPM rhythm_extractor = esst.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) # print("BPM:", bpm) # print("Beat positions (sec.):", beats) print("Beat estimation confidence:", beats_confidence) for i in range(len(beats)): trim = esst.Trimmer(startTime=[*map(lambda x: x - 0.01, beats)][i], endTime=[*map(lambda x: x + 0.15, beats)][i]) \ (audio) if len(mixed_sample): trim = np.resize(trim, mixed_sample.size) stereo_mix = esst.StereoMuxer()(mixed_sample, trim) esst.MonoMixer()(stereo_mix, 2) else: mixed_sample = trim output_array = np.concatenate([output_array, mixed_sample]) esst.MonoWriter(filename=f"../samples/mix_beat{str(i)}.mp3")(output_array) # Mark beat positions on the audio and write it to a file # Let's use beeps instead of white noise to mark them, as it's more distinctive # marker = AudioOnsetsMarker(onsets=beats, type='beep') # marked_audio = marker(audio) # MonoWriter(filename='../samples/dubstep_beats.flac')(marked_audio)
def getWaveSection(audio): # Set sizes n_samples = len(audio) n_of_blocks = 20 samples_per_block = int(n_samples / n_of_blocks) # Final dictionary wave_data = {} # For each block for i in range(0, n_of_blocks): # x0, x1, avg_mood, avg_bpm, strong_peak, energy bands block = { "x0": 0, "x1": 0, "avg_mood": 0, "avg_bpm": 0, "strong_peak": 0, "energy_band": "" } ## Add x0 and x1 x0 = i * samples_per_block x1 = samples_per_block * (i + 1) block["x0"] = x0 block["x1"] = x1 # Take a sample of the audio file... samples = audio[x0:x1] # Create temporary file for audio sample # monowriter to .tmp.wav es.MonoWriter(filename='.tmp.wav')(samples) # Extract lowlevel features subprocess.run([ "lib/essentia/streaming_extractor_music", '.tmp.wav', '.l_tmp', profile_low ]) # Create temporary file for lowlevel features # Read low level descriptors file with open('.l_tmp', 'r', encoding='utf-8') as f: lowlevel_content = json.load(f) f.close() # Read with highlevel extractor subprocess.run([ "essentia_streaming_extractor_music_svm", '.l_tmp', '.h_tmp', profile_wavesection ]) # Read high level descriptors file with open('.h_tmp', 'r', encoding='utf-8') as f: highlevel_content = json.load(f) f.close() ## Add avg energy block["strong_peak"] = lowlevel_content["lowlevel"][ "spectral_strongpeak"]["mean"] ## Add avg bpm block["avg_bpm"] = lowlevel_content["rhythm"]["bpm"] ## Add avg bpm block["avg_mood"] = highlevel_content["highlevel"]["mood_happy"][ "all"]["happy"] ## Spectral energy band energy_band = {} energy_band["low"] = lowlevel_content["lowlevel"][ "spectral_energyband_low"]["mean"] energy_band["middle_low"] = lowlevel_content["lowlevel"][ "spectral_energyband_middle_low"]["mean"] energy_band["middle_high"] = lowlevel_content["lowlevel"][ "spectral_energyband_middle_high"]["mean"] energy_band["high"] = lowlevel_content["lowlevel"][ "spectral_energyband_high"]["mean"] block["energy_band"] = energy_band # Insert in dictionary with key = i wave_data[i] = block # Delete temporary files os.remove('.tmp.wav') os.remove('.l_tmp') os.remove('.h_tmp') return wave_data
import os import essentia.standard as ess for root, dirs, files in os.walk( "/media/kushagra/529EC5229EC50009/Users/kushagra/Master_Datasets/SMC_MIREX/SMC_MIREX/SMC_MIREX_Audio" ): for f in files: #Use Essentia to load the method command = root + '/' + f audioLoader = ess.MonoLoader(filename=str(command)) audio = audioLoader() filt = ess.BandReject(bandwidth=1700, cutoffFrequency=300) filtof = filt(audio) writer = ess.MonoWriter( filename= "/media/kushagra/529EC5229EC50009/Users/kushagra/Master_Datasets/SMC_MIREX/SMC_MIREX/Augmentations/" + str(f), format="wav") write = writer(filtof)
def create_excerpt(audio_path, time, name): """ Given audio path and times, transcribes it and creates new midis and wav files for the given excerpts. `name` is the file name without extension and transcription number. """ full_audio = esst.EasyLoader(filename=audio_path, sampleRate=SR)() start_audio, _ = find_start_stop(full_audio, sample_rate=SR, seconds=True) original = midipath2mat(audio_path[:-4] + '.mid') # compute score path score_path = './my_scores/' + os.path.basename(audio_path)[:-8] + '.mid' score = midipath2mat(score_path) # transcribe data = pickle.load(open(TEMPLATE_PATH, 'rb')) transcription_0, _, _, _ = proposed.transcribe(full_audio, data, score=score) transcription_1 = magenta_transcription.transcribe(full_audio, SR) # transcription_2, _, _, _ = proposed.transcribe(full_audio, # data, # score=None) # chose another interpretation performance = '01' if audio_path[-6:-4] == '01': performance = '02' other = midipath2mat(audio_path[:-6] + performance + '.mid') # segment all the scores and audios full_audio = esst.EasyLoader(filename=audio_path, sampleRate=OUT_SR)() original_audio = full_audio[round(time[0][0] * OUT_SR):round(time[0][1] * OUT_SR)] other_time = remap_original_in_other(original, other, time[0]) original = segment_mat(original, time[0][0], time[0][1], start_audio) other = segment_mat(other, other_time[0], other_time[1], start_audio) transcription_0 = segment_mat(transcription_0, time[0][0], time[0][1], start_audio) transcription_1 = segment_mat(transcription_1, time[0][0], time[0][1], start_audio) # transcription_2 = segment_mat(transcription_2, time[0][0], time[0][1], # start_audio) # write scores to `to_be_synthesized` and audios to `excerpts` if not os.path.exists('to_be_synthesized'): os.mkdir('to_be_synthesized') midi_path = os.path.join('to_be_synthesized', name) mat2midipath(original, midi_path + 'orig.mid') mat2midipath(other, midi_path + 'other.mid') mat2midipath(transcription_0, midi_path + 'proposed.mid') mat2midipath(transcription_1, midi_path + 'magenta.mid') # mat2midipath(transcription_2, midi_path + 'vienna.mid') if not os.path.exists('audio'): os.mkdir('audio') audio_path = os.path.join('audio', name) + 'target.' + FORMAT # write audio if os.path.exists(audio_path): os.remove(audio_path) esst.MonoWriter(filename=audio_path, sampleRate=OUT_SR, format=FORMAT, bitrate=256)(original_audio)
def sliceDrums_from_annotations_SDtrainset(instrument_name, segments_dir, song_dict, fs): """ Input: instrument_name: str woth a key in the song_dict segments_dir : str with path where slices are saved song_dict : dict containing audio stream and annotations fs : sampling rate to properly save the files This function slices audio stream based on annotations and save each slice in a individual wav file, each on the corresponent folder = segmens_dir/song_name/instrument/file.wav Adapted to routines recorded in the studio This function could be combined with the feature extraction in the next cells, but having the slices saved allows us to do data augmentation combining individual samples to get more instances of all the combinations """ song = song_dict[instrument_name] x_seg_dir = os.path.join(segments_dir, instrument_name) od_complex = OnsetDetection(method='complex') w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) onsets = Onsets() file_count = 0 for audio in song['audios']: x = audio duration = float(len(x)) / fs x = x / np.max(np.abs(x)) t = np.arange(len(x)) / float(fs) #Essentia beat tracking pool = Pool() for frame in FrameGenerator(x, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.complex', od_complex(mag, phase)) onsets_list = onsets(array([pool['features.complex']]), [1]) first_onset = int(onsets_list[0] * fs) print(first_onset) if not os.path.exists(segments_dir): #creating the directory os.mkdir(segments_dir) segments_dir__ = os.path.join(segments_dir, instrument_name) if not os.path.exists(segments_dir__): #creating the directory os.mkdir(segments_dir__) n_notes = len(song['annotations']) annotations = song['annotations'] for i in range(1, n_notes): if i != n_notes - 1 and i != 0: x_seg = audio[(annotations[i][2] - 3000 + first_onset):(annotations[i + 1][2] - 3000 + first_onset)] if len(x_seg) < 5000 or np.max(np.abs(x_seg)) < 0.05: continue x_seg = x_seg / np.max(np.abs(x_seg)) if not os.path.exists(x_seg_dir): #creating the directory os.mkdir(x_seg_dir) path, dirs, files = next(os.walk(x_seg_dir)) dir_n_files = len(files) if annotations[i][1] == 'N': continue filename = os.path.join( x_seg_dir, instrument_name + '_' + str(dir_n_files) + '.wav') ess.MonoWriter(filename=filename, format='wav', sampleRate=fs)(x_seg) file_count = file_count + 1 print(instrument_name + ": " + str(file_count))
monoLoader = es.MonoLoader(filename=mixFile, sampleRate=44100) x = monoLoader()[:nSeconds * 44100] _stft = stft(x, n_fft=fftSize, hop_length=hopSize, win_length=frameSize, window=winType) X_H, X_P = hpss(_stft, kernel_size=150) # Get harmonic and percussive stfts x_h = istft( X_H, hop_length=hopSize, win_length=frameSize) # Convert stfts to time domain signals x_p = istft(X_P, hop_length=hopSize, win_length=frameSize) MonoWriter = es.MonoWriter(sampleRate=44100, format="mp3") # Write to file MonoWriter.configure(filename=saveFolderLoc + filename + "_median_percussive.mp3") MonoWriter(array(x_p)) MonoWriter = es.MonoWriter(sampleRate=44100, format="mp3") # Write to file MonoWriter.configure(filename=saveFolderLoc + filename + "_median_harmonic.mp3") MonoWriter(array(x_h)) print("DONE")
raga = metadata['raaga'][0]['common_name'] except IndexError: continue raga_dir = os.path.join(output_dir, raga) os.makedirs(raga_dir, exist_ok=True) try: length = len(phrases) except TypeError: length = 1 phrases = np.array([phrases]) print(phrases) if len(phrases) > 0: collated_phrases = commons.collate_phrases(phrases) for phrase_annotation, phrases in collated_phrases.items(): for i, indices in enumerate(phrases): melodic_segment_dir = os.path.join(raga_dir, phrase_annotation.decode()) os.makedirs(melodic_segment_dir, exist_ok=True) melodic_segment_path = os.path.join( melodic_segment_dir, '{}-{:02}.mp3'.format(filename, i)) ess.MonoWriter(filename=melodic_segment_path, format='mp3')(audio[indices[0]:indices[1]]) total_phrases += len(phrases) print(filename, len(phrases)) print(total_phrases)
def separate_and_analyze_function(self): # Separation using Smoothness/Sparseness frameSize = self.mixed_canvas.frame_size_comboBox.currentText() frameSize = int(frameSize) hopSize = self.mixed_canvas.hop_size_comboBox.currentText() hopSize = int(eval(hopSize) * frameSize) fftSize = self.mixed_canvas.fft_size_comboBox.currentText() fftSize = int(fftSize) x, sampleRate = self.mixed_canvas.get_audio() file_directory = os.path.dirname(self.mixed_canvas.get_filename()) # File name format Algo_FFTSize_frameSize_hopSize SMSP_filename_prefix = "SMSP_" + str(fftSize) + "_" + str( frameSize) + "_" + str(hopSize) median_filename_prefix = "median_" + str(fftSize) + "_" + str( frameSize) + "_" + str(hopSize) if not x == []: if self.SMSP_checkbox.checkState(): self.StatusBarSignal.emit( "Separating Using Smoothness/Sparseness NMF Algorithm") # separate using Smoothness/Sparseness hpss = SMSP_HPSS( np.array(x), directory=file_directory, filename=SMSP_filename_prefix, format="wav", beta=1.5, frameSize=frameSize, hopSize=hopSize, fftSize=fftSize, Rp=150, Rh=150, K_SSM=.2, # Percussive Spectral Smoothness K_TSP=.1, # Percussive Temporal Smoothness K_SSP=.1, # Harmonic Spectral Smoothness K_TSM=.2, # Harmonic Temporal Smoothness ) maxIter = 100 for i in range(int(maxIter)): self.StatusBarSignal.emit("Iteration %i out of %i" % (i + 1, maxIter)) hpss.next_iteration() hpss.create_masks() hpss.spectral_to_temporal_using_masks() hpss.save_separated_audiofiles() shutil.move( os.path.join(file_directory, SMSP_filename_prefix + "_harmonic.wav"), os.path.join(os.path.join(file_directory, "harmonic"), SMSP_filename_prefix + "_harmonic.wav")) shutil.move( os.path.join(file_directory, SMSP_filename_prefix + "_percussive.wav"), os.path.join(os.path.join(file_directory, "percussive"), SMSP_filename_prefix + "_percussive.wav")) # self.StatusBarSignal.emit("Finished Separating (SMSP)") if self.median_checkbox.checkState(): self.StatusBarSignal.emit( "Separating Using Median Filtering Algorithm") # Separation using median filtering _stft = stft(x, n_fft=fftSize, hop_length=hopSize, win_length=frameSize, window="hann") X_H, X_P = librosa.decompose.hpss( _stft, kernel_size=150) # Get harmonic and percussive stfts x_h = istft(X_H, hop_length=hopSize, win_length=frameSize ) # Convert stfts to time domain signals x_p = istft(X_P, hop_length=hopSize, win_length=frameSize) MonoWriter = es.MonoWriter(sampleRate=44100, format="wav") # Write to file MonoWriter.configure(filename=os.path.join( os.path.join(file_directory, "percussive"), median_filename_prefix + "_percussive.wav")) MonoWriter(array(x_p)) MonoWriter = es.MonoWriter(sampleRate=44100, format="wav") # Write to file MonoWriter.configure(filename=os.path.join( os.path.join(file_directory, "harmonic"), median_filename_prefix + "_harmonic.wav")) MonoWriter(array(x_h)) # self.StatusBarSignal.emit("Finished Separating (Median Filtering)") self.FindSeparatedFilesSignal.emit() return