def make_dataset_1(output_dir): """ Dataset 1 consists of 6 hours of synthetic noisy speech. About the same length as the noise, however it has been sampled with It has been sampled with replacemnt. The Clean files are from switchboard and the noise is anotated noise only segments from the lre17_dev set. Each sample is 5 seconds long. SNR 10 % is reserved for the validation set """ train_len = int(6.6 * .9 * 3600 / 5) # synthesise 1 times the train noise test_len = int(6.6 * .1 * 3600 / 5) # synthesise 1 times the test noise train_set = Data_synthesis_1(length=train_len) training_data_loader = DataLoader(train_set, batch_size=1, num_workers=2) t_path_str_x = os.path.join(output_dir, 'train', 'x', 'sample_{}.wav') t_path_str_y = os.path.join(output_dir, 'train', 'y', 'sample_{}.wav') validation_set = Data_synthesis_1(length=test_len, test=True) validation_data_loader = DataLoader(validation_set, batch_size=1, num_workers=2) v_path_str_x = os.path.join(output_dir, 'val', 'x', 'sample_{}.wav') v_path_str_y = os.path.join(output_dir, 'val', 'y', 'sample_{}.wav') list_ = ((t_path_str_x, t_path_str_y, training_data_loader), (v_path_str_x, v_path_str_y, validation_data_loader) ) for path_str_x, path_str_y, data_loader in list_: makedir(os.path.dirname(path_str_x)) makedir(os.path.dirname(path_str_y)) for i, (x, y) in enumerate(data_loader): x, y = x.numpy()[0], y.numpy()[0] save_audio_(x, path_str_x.format(i)) save_audio_(y, path_str_y.format(i))
def enhance_soundfiles(model, y_paths, output_paths, batch_size, cuda=False): if cuda: model.cuda() model.eval() test_set = Datafolder_soundfiles(y_paths=y_paths, transform=model.transform) test_data_loader = DataLoader(test_set, batch_size=batch_size, num_workers=2, pin_memory=cuda) for (indexs, (data, Y_m, Y_a, length)) in test_data_loader: try: mask = estimate_mask(model, data, cuda=cuda) except: for index in indexs: print(index, output_paths[index], 'failed') continue mask, Y_m, Y_a, length = (k.numpy() for k in (mask, Y_m, Y_a, length)) for I, index in enumerate(indexs): y = clean_sample_(model, mask[I], Y_m[I], Y_a[I], length[I], output_paths[index]) save_audio_(y, save_path) """ compute the masks here and .... """ print('Finished clearning', input_dir)
def make_dataset_5(output_dir): """ Dataset 1 consists of 6*5 hours of synthetic noisy speech. About the same length as the noise, however it has been sampled with It has been sampled with replacemnt. The Clean files are from switchboard and the noise is anotated noise only segments from the lre17_dev set. Each sample is 5 seconds long. SNR 10 % is reserved for the validation set """ # int(119.2 *10**9 /2/8000/5)/int(6.6 * .9 * 3600 / 5 * 5) # output_dir a = Path('.') / 'data' / 'processed' / 'dataset_1' # lre17_train is 119.2 GB which is 119.2 *10**9 /2/8000 seconds # almost 70 times larger # train_len = int(6.6 * .9 * 3600 / 5 * 5) # synthesise 1 times the train noise train_len = int(119.2 * 10**9 / 2 / 8000 / 5) # synthesise equel to lre17_train # test_len = int(6.6 * .1 * 3600 / 5 * 5) # synthesise 5 times the test noise train_set = Data_synthesis_1(length=train_len, speech_list='lre_train', noise_version=2) training_data_loader = DataLoader(train_set, batch_size=1, num_workers=2) t_path_str_x = os.path.join(output_dir, 'train', 'x', 'sample_{}.wav') t_path_str_y = os.path.join(output_dir, 'train', 'y', 'sample_{}.wav') list_ = ((t_path_str_x, t_path_str_y, training_data_loader), ) for path_str_x, path_str_y, data_loader in list_: makedir(os.path.dirname(path_str_x)) makedir(os.path.dirname(path_str_y)) for i, (x, y) in enumerate(data_loader): x, y = x.numpy()[0], y.numpy()[0] save_audio_(x, path_str_x.format(i)) save_audio_(y, path_str_y.format(i))
def clean_sample_(model, mask, Y_m, Y_a, length, save_path): Xh_m = model.output_transform(mask, Y_m) y = model.inverse_transform(Xh_m, Y_a, new_length=length) save_audio_(y, save_path)