def time_stretch_hpss(audio, f): if f == 1.0: return audio stft = core.stft(audio) # Perform HPSS stft_harm, stft_perc = decompose.hpss( stft, kernel_size=31) # original kernel size 31 # OLA the percussive part y_perc = librosa.util.fix_length(core.istft(stft_perc, dtype=audio.dtype), len(audio)) y_perc = time_stretch_sola(y_perc, f) #~ # Phase-vocode the harmonic part #~ stft_stretch = core.phase_vocoder(stft_harm, 1.0/f) #~ # Inverse STFT of harmonic #~ y_harm = librosa.util.fix_length(core.istft(stft_stretch, dtype=y_perc.dtype), len(y_perc)) y_harm = librosa.util.fix_length(core.istft(stft_harm, dtype=audio.dtype), len(audio)) y_harm = librosa.util.fix_length( time_stretch_sola(core.istft(stft_harm, dtype=audio.dtype), f, wsola=True), len(y_perc)) # Add them together return y_harm + y_perc
def get_mean_percussive_ratio_dbs(feat_files, margin=3.0): mean_percussive_ratios_db = [] idx = 0 idxs = [] for feat_file in tqdm.tqdm(feat_files): try: features = np.load(feat_file) idxs.append(idx) idx += 1 except Exception as e: print('Skipping {}. {}'.format(feat_file, e)) idx += 1 continue D = features['linspec_mag'] * np.exp(1.j * features['linspec_mag']) H, P = hpss(D, margin=margin) Pm, Pp = magphase(P) S, phase = magphase(D) P_rms = rmse(S=Pm) S_rms = rmse(S=S) percussive_ratio = P_rms / S_rms mean_percussive_ratio_db = amplitude_to_db( np.array([np.mean(percussive_ratio)]))[0] mean_percussive_ratios_db.append(mean_percussive_ratio_db) mean_percussive_ratios_db = np.array(mean_percussive_ratios_db) return mean_percussive_ratios_db, idxs
def preprocUnlabeledDataset(UNLABELED_DATA_DIR, METADATA_DIR): genre_info = getGenres(UNLABELED_DATA_DIR) print('checking stft save repositories...') save_stft_repo = checkSaveRepos('./stft/') save_stft_p_repo = checkSaveRepos('./stft_p/') sr = 44100 window_size = 2048 hop_size = 512 ext_type = 'mp3' tmp = np.load(METADATA_DIR) clean_list = tmp[0] problem_list = tmp[1] for path, genre in clean_list: print('checking genre repositories... %s' % genre) save_genre_repo = checkSaveRepos(save_stft_repo + genre + '/') save_genre_p_repo = checkSaveRepos(save_stft_p_repo + genre + '/') tmp = path.split('/') file_name = tmp[3] song_name = file_name.split('.')[0] song_path = UNLABELED_DATA_DIR + genre + '/' + file_name stft_file_path = save_genre_repo + song_name + '.npy' stft_p_file_path = save_genre_p_repo + song_name + '.npy' if isfile(stft_file_path) and isfile(stft_p_file_path): print('file already exist! go to next song') else: print('processing song: %s' % song_name) y, sr = load(song_path, sr=sr) #this function takes care of both sr and mono y_segment = getSegmentFromSong(y, sr, True, 'middle', 29) #print(y_segment) print('computing STFT and HPSS') Y = stft(y_segment, n_fft=window_size, hop_length=hop_size, window='hann') Y_mag = abs(Y) H, P = hpss(Y_mag, margin=1.0) #Y = H + P print('saving results...') np.save(stft_file_path, Y_mag) np.save(stft_p_file_path, P) return ()
def convert_lower_threshold(y, input_filename, output_filename, sr, parameters): harmonic_path = input_filename.replace('.wav', 'harmonic_.wav') out_harmonic_path = input_filename.replace('.wav', 'out_harmonic_.wav') percussive_path = input_filename.replace('.wav', 'percussive_.wav') out_percussive_path = input_filename.replace('.wav', 'out_percussive_.wav') D = spectrum.stft(y) D_h, D_p = decompose.hpss(D, power=2.0) D_harmonic, D_percussive = padding(spectrum.istft(D_h), spectrum.istft(D_p)) sf.write(harmonic_path, D_harmonic, sr) sf.write(percussive_path, D_percussive, sr) with WavReader(percussive_path) as reader: with WavWriter(out_percussive_path, reader.channels, reader.samplerate) as writer: tsm = harmonic(reader.channels, **parameters) tsm.run_harmonic(reader, writer) with WavReader(harmonic_path) as reader: with WavWriter(out_harmonic_path, reader.channels, reader.samplerate) as writer: tsm = harmonic(reader.channels, **parameters) tsm.run_harmonic(reader, writer) y1, sr1 = core.load(out_harmonic_path, sr=sr) y2, sr2 = core.load(out_percussive_path, sr=sr) D1, D2 = spectrum.stft(y1), spectrum.stft(y2) D_h, D_p = padding(spectrum.istft(D1), spectrum.istft(D2)) sf.write(output_filename, (D_h + D_p), sr) '''if os.path.isfile(harmonic_path): os.remove(harmonic_path) if os.path.isfile(percussive_path): os.remove((percussive_path))''' if os.path.isfile(out_harmonic_path): os.remove(out_harmonic_path) if os.path.isfile(out_percussive_path): os.remove(out_percussive_path)
def percussive_ratio(y=None, y_frames=None, n_fft=2048, hop_size=512, margin=1.0): """ Compute ratio of percussive power to total power """ # default for 22050 if y is not None: D = stft(y, n_fft=n_fft, hop_length=hop_size) elif y_frames is not None: D = frames_stft(y_frames, n_fft=n_fft, hop_length=hop_size) H, P = hpss(D, margin=margin) Pm, Pp = magphase(P) S, phase = magphase(D) P_rms = rms(S=Pm) S_rms = rms(S=S) return amplitude_to_db(P_rms / S_rms), P_rms, S_rms
i + 1) + saveFolder # filename of the percussive and harmonic mixture if not os.path.isdir(saveFolderLoc): os.makedirs(saveFolderLoc) monoLoader = es.MonoLoader(filename=mixFile, sampleRate=44100) x = monoLoader()[:nSeconds * 44100] _stft = stft(x, n_fft=fftSize, hop_length=hopSize, win_length=frameSize, window=winType) X_H, X_P = hpss(_stft, kernel_size=150) # Get harmonic and percussive stfts x_h = istft( X_H, hop_length=hopSize, win_length=frameSize) # Convert stfts to time domain signals x_p = istft(X_P, hop_length=hopSize, win_length=frameSize) MonoWriter = es.MonoWriter(sampleRate=44100, format="mp3") # Write to file MonoWriter.configure(filename=saveFolderLoc + filename + "_median_percussive.mp3") MonoWriter(array(x_p)) MonoWriter = es.MonoWriter(sampleRate=44100, format="mp3") # Write to file MonoWriter.configure(filename=saveFolderLoc + filename +