def test_file_loading(): # initiate dsdtools dsd = dsdtools.DB(root_dir="data/DSD100subset") tracks = dsd.load_dsd_tracks() assert len(tracks) == 4 for track in tracks: assert track.audio.shape[1] > 0 # load only the dev set tracks = dsd.load_dsd_tracks(subsets='Dev') assert len(tracks) == 2 # load only the dev set tracks = dsd.load_dsd_tracks(subsets=['Dev', 'Test']) assert len(tracks) == 4 # load only a single id tracks = dsd.load_dsd_tracks(ids=55) assert len(tracks) == 1
def test_evaluate(method): dsd = dsdtools.DB(root_dir="data/DSD100subset", evaluation=method) # process dsd but do not save the results assert dsd.evaluate( user_function=user_function1, ids=55 )
def __init__(self, root_dir=CONSTS.DB_PATH, targets=CONSTS.TARGETS): self.DB = dsdtools.DB(root_dir=root_dir) self.dev_set = self.DB.load_dsd_tracks(subsets='Dev') self.test_set = self.DB.load_dsd_tracks(subsets='Test') self.targets = targets self.models = [ model_constructor.make_compile_model(target) for target in self.targets ] self.models_input = processing.make_input(self.dev_set)
def test_file_loading_valid(): # initiate dsdtools dsd = dsdtools.DB(root_dir="data/DSD100subset", valid_ids=55) tracks = dsd.load_dsd_tracks(subsets='Dev') # from two tracks there is only one track left (id=81) assert len(tracks) == 1 assert tracks[0].id == 81 tracks = dsd.load_dsd_tracks(subsets='Valid') assert len(tracks) == 1 assert tracks[0].id == 55 with pytest.raises(ValueError): tracks = dsd.load_dsd_tracks(subsets=['Dev', 'Valid'])
def musdb2stft(save_dir='./data/', target='wo_vocals'): """ create stft dataset from MUSDB18 :param save_dir: the path for the wanted data directory :param target: music target. for the original setup in the paper run twice, \ once for 'wo_vocals' and once for 'mixture' """ if opts.dataset == 'MUSDB18': mus = musdb.DB(root_dir='./dataset/musdb18') tracks = mus.load_mus_tracks(subsets='train') elif opts.dataset == 'DSD100': dsd = dsdtools.DB(root_dir='./dataset/DSD100') tracks = dsd.load_dsd_tracks(subsets='Dev') else: sys.exit("Only support DSD100|MUSDB18") save_dir = os.path.join(save_dir, target) if not os.path.exists(save_dir): print("Creating directory: {}".format(save_dir)) os.makedirs(save_dir) for track in tqdm(tracks): music = switch_music(target, track) # resample audio file to 20480 Hz and create mono audio array from both channels music = music.mean(axis=1) samplerate = 20480 music = librosa.resample(music, track.rate, samplerate) # parameters setting, resulting images of 257*256 fft_size = 512 hopsamp = fft_size // 8 part_duration = 255 / (samplerate / hopsamp) length_song = music.shape[0] batch_size = round(samplerate * part_duration) counter = 1 # data augmentation, taking 0.8 sec of audio from to create stft with different start time for shift_duration in tqdm([0, 0.3, 0.6]): shift_len = round(shift_duration * samplerate) number_of_parts = int( np.floor((length_song - shift_len) / batch_size)) data = music[shift_len:number_of_parts * batch_size + shift_len] data2 = data.reshape(number_of_parts, int(data.size / number_of_parts)) for row in data2: stft_full = librosa.core.stft(row, n_fft=fft_size, hop_length=hopsamp, win_length=fft_size) stft_full = stft_full[0:-1, :] stft_mag = abs(stft_full) stft_mag = np.interp(stft_mag, (stft_mag.min(), stft_mag.max()), (-0, +1)) stft_mag = (stft_mag**0.3) * 255 stft_mag = stft_mag.astype('uint8') if opts.dataset == 'MUSDB18': songname = track.name + '_' + str(counter) else: songname = track.filename + '_' + str(counter) songname = os.path.join(save_dir, songname) if stft_mag.sum() != 0: imageio.imwrite(songname + '.png', stft_mag) counter += 1
# multiply mask Yj = np.multiply(X, Mask) # inverte to time domain and set same length as original mixture target_estimate = istft(Yj)[1].T[:N,:] # set this as the source estimate estimates[target_name] = target_estimate # accumulate to the accompaniment if this is not vocals if target_name != 'vocals': accompaniment_source += target_estimate # set accompaniment source estimates['accompaniment']=accompaniment_source return estimates # initiate dsdtools dsd = dsdtools.DB() #default parameters alpha = 1 #exponent for the ratio Mask theta = 0.5 # threshold dsd.run( functools.partial(IBM, alpha=alpha,theta=theta), estimates_dir='IBM', parallel=True, cpus=4 )
Voice.y = rs.resample(Voice.y, fs / 2, fs) Voice.y = Voice.y[0:numSamples] mS = np.multiply(SongMask, Mix.mX) Song.y = SM.istft(mS, Mix.pX, Parm) Song.y = rs.resample(Song.y, fs / 2, fs) Song.y = Song.y[0:numSamples] voc_array[:, 1] = Voice.y acc_array[:, 1] = Song.y # return any number of targets estimates = { 'vocals': voc_array, 'accompaniment': acc_array, } return estimates # initiate dsdtools dsd = dsdtools.DB(root_dir="./Wavfile") # verify if my_function works correctly if dsd.test(my_function): print("my_function is valid") dsd.run( my_function, estimates_dir='./Audio/CNN_015', )
import dsdtools import gc import keras from preprocessing import * from consts import * from models import * dsd = dsdtools.DB(root_dir=DB_PATH) dev_set = dsd.load_dsd_tracks(subsets='Dev') test_set = dsd.load_dsd_tracks(subsets='Test') targets = TARGETS models = list() for t in targets: models.append(APModel(t)) models_input = processInput(dev_set) def trainModel(target, epochs=EPOCHS, batch_size=BATCH_SIZE, save_epochs=False): model = models[targets.index(target)] model_target = processTarget(dev_set, target) checkpointer = keras.callbacks.ModelCheckpoint(model.name + '_DNN_model', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)
def test_stft_dsd(config, checkpoint_dir, output_folder='./outputs/', method_name='abl_1', target='vocals', is_test=False): """ Testing/evaluating the net. For given generator, pruduces sdr, sir and sar for DSD100 dataset. :param config: path to config file :param checkpoint_dir: checkpoint_dir: path to generator's saved parameters. In case of evaluating during training, path to checkpoints directory. :param output_folder: desired output path :param method_name: name of method :param target: desired target (vocals/drums/bass) :param is_test: flag, when running during training should be False. True for total DSD100 evaluation for a given checkpoint. :return: stat parameters for the net, added to tensorboard. """ dsd = dsdtools.DB(root_dir='../../data/datasets/music/DSD100') tracks = dsd.load_dsd_tracks(subsets='Test') config = get_config(config) trainer = MUSIC_Trainer(config) enhance = 9 if ~is_test: tracks = [tracks[i] for i in [17, 41, 6, 23, 31]] last_gen_name = get_model_list(checkpoint_dir, "gen") state_dict = torch.load(last_gen_name) trainer.gen.load_state_dict(state_dict['gen']) trainer.cuda() trainer.eval() encode, decode = trainer.gen.encode, trainer.gen.decode recon_list = [] sdr_list, sir_list, sar_list = [], [], [] sdr_list_inter, sir_list_inter, sar_list_inter = [], [], [] print(method_name) for track in tqdm(tracks): sample_rate = 20480 music_array, music_array_ref, music_array_inter = music_track( track, target) music_array_samp = librosa.resample(music_array.transpose(), track.rate, sample_rate) masker_l = GenerateMusic(music_array_samp[0, :], encode, decode, enhance=enhance) # default was 8 recon_vocals_l, recon_inter_l = masker_l.forward() masker_r = GenerateMusic(music_array_samp[1, :], encode, decode, enhance=enhance) recon_vocals_r, recon_inter_r = masker_r.forward() recon_vocals = np.vstack((recon_vocals_l, recon_vocals_r)) recon_inter = np.vstack((recon_inter_l, recon_inter_r)) recon_vocals = librosa.resample(recon_vocals, sample_rate, track.rate) recon_inter = librosa.resample(recon_inter, sample_rate, track.rate) recon_vocals = recon_vocals.transpose() recon_inter = recon_inter.transpose() recon_list.append(recon_vocals) if len(music_array_ref) > len(recon_vocals): len_diff = len(music_array_ref) - len(recon_vocals) recon_vocals = np.concatenate( (recon_vocals, recon_vocals[-len_diff:, :])) recon_inter = np.concatenate( (recon_inter, recon_inter[-len_diff:, :])) elif len(music_array_ref) < len(recon_vocals): recon_vocals = recon_vocals[0:len(music_array_ref), :] recon_inter = recon_inter[0:len(music_array_ref), :] reference_music = np.array([music_array_ref, music_array_inter]) estimates_music = np.array([recon_vocals, recon_inter]) sdr_b, _, sir_b, sar_b, _ = museval.metrics.bss_eval_images_framewise( reference_music, estimates_music, window=1323000, hop=661500) sdr, sir, sar = sdr_b[0], sir_b[0], sar_b[0] sdr_inter, sir_inter, sar_inter = sdr_b[1], sir_b[1], sar_b[1] sdr, sir, sar = np.mean(sdr[~np.isnan(sdr)]), np.mean( sir[~np.isnan(sir)]), np.mean(sar[~np.isnan(sar)]) sdr_inter, sir_inter, sar_inter = np.mean(sdr_inter[~np.isnan(sdr_inter)]), \ np.mean(sir_inter[~np.isnan(sir_inter)]), np.mean(sar_inter[~np.isnan(sar_inter)]) sdr_list.append(sdr), sir_list.append(sir), sar_list.append(sar) sdr_list_inter.append(sdr_inter), sir_list_inter.append( sir_inter), sar_list_inter.append(sar_inter) sdr_max = max(sdr_list) sdr_max_loc = sdr_list.index(sdr_max) sir_max = max(sir_list) sir_max_loc = sir_list.index(sir_max) sdr_median, sir_median, sar_median = np.median(sdr_list), np.median( sir_list), np.median(sar_list) sdr_median_inter, sir_median_inter, sar_median_inter = np.median(sdr_list_inter), \ np.median(sir_list_inter), np.median(sar_list_inter) output_folder = output_folder + method_name if not os.path.exists(output_folder): print("Creating directory: {}".format(output_folder)) os.makedirs(output_folder) stats = [ sdr_median, sir_median, sar_median, sdr_median_inter, sir_median_inter, sar_median_inter ] stats_name = [ 'sdr_median', 'sir_median', 'sar_median', 'sdr_median_inter', 'sir_median_inter', 'sar_median_inter' ] with open(os.path.join(output_folder, 'stats_final_test.txt'), 'w') as f: for stat_name, stat in zip(stats_name, stats): f.write("%s\n" % stat_name), f.write("%s\n" % stat) print(stat_name + ': ' + str(stat)) stats_dic = { 'sdr': sdr_list, 'sir': sir_list, 'sar': sar_list, 'sdr_inter': sdr_list_inter, 'sir_inter': sir_list_inter, 'sar_inter': sar_list_inter } outfile = os.path.join(output_folder, 'final_results') save_obj(stats_dic, outfile) music_2_write = recon_list[sdr_max_loc] music_2_write_sec = recon_list[sir_max_loc] sf.write( os.path.join(output_folder, 'best_sdr_iter_' + tracks[sdr_max_loc].filename + '.wav'), music_2_write, track.rate) sf.write( os.path.join(output_folder, 'best_sir_iter_' + tracks[sir_max_loc].filename + '.wav'), music_2_write_sec, track.rate) return sdr_median, sir_median, sar_median, sdr_max, sir_max
def dsd(request): return dsdtools.DB(root_dir=request.param)
elif instrument == 'drums': test_X.append(banks) test_y.append(1) elif instrument == 'bass': test_X.append(banks) test_y.append(2) i += SAMPLE_LENGTH if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('-f', dest='PATH') args = parser.parse_args() dsd = dsdtools.DB(root_dir=args.PATH) dsd.run(process_train_track, subsets='Dev') print("Processing training data") train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=.2, random_state=6) train_X = np.array(train_X) val_X = np.array(val_X) np.save('data/train_X.npy', train_X) np.save('data/train_y.npy', train_y) np.save('data/val_X.npy', val_X) np.save('data/val_y.npy', val_y) print("Saved training data")
def test_env(path): if path is not None: os.environ["DSD_PATH"] = path assert dsdtools.DB()