def make_dataset_1(output_dir):
    """
    Dataset 1 consists of 6 hours of synthetic noisy speech. About the same length
    as the noise, however it has been sampled with It has been sampled with
    replacemnt. The Clean files are from switchboard and the noise is anotated
    noise only segments from the lre17_dev set. Each sample is 5 seconds long.
    SNR
        10 % is reserved for the validation set
    """

    train_len = int(6.6 * .9 * 3600 / 5)  # synthesise 1 times the train noise
    test_len = int(6.6 * .1 * 3600 / 5)  # synthesise 1 times the test noise

    train_set = Data_synthesis_1(length=train_len)
    training_data_loader = DataLoader(train_set, batch_size=1, num_workers=2)
    t_path_str_x = os.path.join(output_dir, 'train', 'x', 'sample_{}.wav')
    t_path_str_y = os.path.join(output_dir, 'train', 'y', 'sample_{}.wav')

    validation_set = Data_synthesis_1(length=test_len, test=True)
    validation_data_loader = DataLoader(validation_set, batch_size=1, num_workers=2)
    v_path_str_x = os.path.join(output_dir, 'val', 'x', 'sample_{}.wav')
    v_path_str_y = os.path.join(output_dir, 'val', 'y', 'sample_{}.wav')

    list_ = ((t_path_str_x, t_path_str_y, training_data_loader),
             (v_path_str_x, v_path_str_y, validation_data_loader)
             )

    for path_str_x, path_str_y, data_loader in list_:
        makedir(os.path.dirname(path_str_x))
        makedir(os.path.dirname(path_str_y))
        for i, (x, y) in enumerate(data_loader):
            x, y = x.numpy()[0], y.numpy()[0]
            save_audio_(x, path_str_x.format(i))
            save_audio_(y, path_str_y.format(i))
Beispiel #2
0
def enhance_soundfiles(model, y_paths, output_paths, batch_size, cuda=False):
    if cuda:
        model.cuda()
    model.eval()

    test_set = Datafolder_soundfiles(y_paths=y_paths,
                                     transform=model.transform)
    test_data_loader = DataLoader(test_set,
                                  batch_size=batch_size,
                                  num_workers=2,
                                  pin_memory=cuda)

    for (indexs, (data, Y_m, Y_a, length)) in test_data_loader:
        try:
            mask = estimate_mask(model, data, cuda=cuda)
        except:
            for index in indexs:
                print(index, output_paths[index], 'failed')
            continue

        mask, Y_m, Y_a, length = (k.numpy() for k in (mask, Y_m, Y_a, length))

        for I, index in enumerate(indexs):
            y = clean_sample_(model, mask[I], Y_m[I], Y_a[I], length[I],
                              output_paths[index])
            save_audio_(y, save_path)
            """
                compute the masks here and ....
                """

    print('Finished clearning', input_dir)
Beispiel #3
0
def make_dataset_5(output_dir):
    """
    Dataset 1 consists of 6*5 hours of synthetic noisy speech. About the same length
    as the noise, however it has been sampled with It has been sampled with
    replacemnt. The Clean files are from switchboard and the noise is anotated
    noise only segments from the lre17_dev set. Each sample is 5 seconds long.
    SNR
        10 % is reserved for the validation set
    """
    # int(119.2 *10**9 /2/8000/5)/int(6.6 * .9 * 3600 / 5 * 5)
    # output_dir a = Path('.') / 'data' / 'processed' / 'dataset_1'
    # lre17_train is 119.2 GB which is 119.2 *10**9 /2/8000 seconds
    # almost 70 times larger
    # train_len = int(6.6 * .9 * 3600 / 5 * 5)  # synthesise 1 times the train noise
    train_len = int(119.2 * 10**9 / 2 / 8000 /
                    5)  # synthesise equel to lre17_train
    # test_len = int(6.6 * .1 * 3600 / 5 * 5)  # synthesise 5 times the test noise

    train_set = Data_synthesis_1(length=train_len,
                                 speech_list='lre_train',
                                 noise_version=2)
    training_data_loader = DataLoader(train_set, batch_size=1, num_workers=2)
    t_path_str_x = os.path.join(output_dir, 'train', 'x', 'sample_{}.wav')
    t_path_str_y = os.path.join(output_dir, 'train', 'y', 'sample_{}.wav')

    list_ = ((t_path_str_x, t_path_str_y, training_data_loader), )

    for path_str_x, path_str_y, data_loader in list_:
        makedir(os.path.dirname(path_str_x))
        makedir(os.path.dirname(path_str_y))
        for i, (x, y) in enumerate(data_loader):
            x, y = x.numpy()[0], y.numpy()[0]
            save_audio_(x, path_str_x.format(i))
            save_audio_(y, path_str_y.format(i))
Beispiel #4
0
def clean_sample_(model, mask, Y_m, Y_a, length, save_path):
    Xh_m = model.output_transform(mask, Y_m)
    y = model.inverse_transform(Xh_m, Y_a, new_length=length)
    save_audio_(y, save_path)