Esempio n. 1
0
def prepare_training_data(chime_data_dir, dest_dir, suffix_id):
    for stage in ['tr', 'dt']:
        fpath, flist = gen_flist_simu(chime_data_dir, stage, suffix_id)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(fpath)):
            clean_audio = get_audio_data(f, '_clean')
            noise_audio = get_audio_data(f, '_noise')
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))
            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(
                os.path.join(dest_dir,
                             'flist_{}_{}.json'.format(stage, suffix_id)),
                'w') as fid:
            json.dump(export_flist, fid, indent=4)
Esempio n. 2
0
def test(args):
    from test_utils import ModelTest

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare Data
    test_dataset = Chime_Dataset('dt', args)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             pin_memory=True,
                             collate_fn=lambda x: Chime_Collate(x),
                             num_workers=args.num_workers)

    # Prepare model
    if args.model_type == 'BLSTM':
        model = BLSTMMaskEstimator()
        model_save_dir = os.path.join(args.data_dir, 'BLSTM_model')
        mkdir_p(model_save_dir)
    elif args.model_type == 'FW':
        model = SimpleFWMaskEstimator()
        model_save_dir = os.path.join(args.data_dir, 'FW_model')
        mkdir_p(model_save_dir)
    else:
        raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"')

    criterion = torch.nn.BCELoss()

    tester = ModelTest(model, test_loader, criterion, args, device)
    tester.test()
Esempio n. 3
0
def train(args):
    from train_utils import ModelTrainer

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare Data
    train_dataset = Chime_Dataset('tr', args)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              collate_fn=lambda x: Chime_Collate(x),
                              num_workers=args.num_workers)

    # Prepare model
    if args.model_type == 'BLSTM':
        model = BLSTMMaskEstimator()
        model_save_dir = os.path.join(args.data_dir, 'BLSTM_model')
        mkdir_p(model_save_dir)
    elif args.model_type == 'FW':
        model = SimpleFWMaskEstimator()
        model_save_dir = os.path.join(args.data_dir, 'FW_model')
        mkdir_p(model_save_dir)
    else:
        raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"')

    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-4)

    trainer = ModelTrainer(model, train_loader, criterion, optimizer, args,
                           device)
    trainer.train(args.num_epochs)
Esempio n. 4
0
def prepare_clean_training_data(chime_data_dir, dest_dir):
    start = 0
    # print("sdsd")
    for stage in ['tr', 'dt']:
        reset_counter = 0
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav')
        print("clean_data size:", clean_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            # print(chime_data_dir)
            chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, ''))
            clean_files = list()
            end = chime_size.shape[0] + start
            if end > clean_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 7):
                y = clean_data[start:end]
            start = end
            clean_files.append(y[None, :])
            clean_files = np.concatenate(clean_files, axis=0)
            clean_files = clean_files.astype(np.float32)
            clean_audio = clean_files

            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)
Esempio n. 5
0
def prepare_training_data(chime_data_dir, dest_dir):
    for stage in ['tr', 'dt']:
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))
            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)
Esempio n. 6
0
stage = args.flist[:2]
scenario = args.flist.split('_')[-1]

# CHiME data handling
if scenario == 'simu':
    flist = gen_flist_simu(args.chime_dir, stage)
elif scenario == 'real':
    flist = gen_flist_real(args.chime_dir, stage)
else:
    raise ValueError('Unknown flist {}'.format(args.flist))

for env in ['caf', 'bus', 'str', 'ped']:
    for beamformer in beamformers:
        mkdir_p(os.path.join(args.output_dir, beamformer,'{}05_{}_{}'.format(
                stage, env, scenario
        )))

t_io = 0
t_net = 0
t_beamform = 0
# Beamform loop
for cur_line in tqdm(flist):
    with Timer() as t:
        if scenario == 'simu':
            audio_data = get_audio_data(cur_line)
            context_samples = 0
        elif scenario == 'real':
            audio_data, context_samples = get_audio_data_with_context(
                    cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
Esempio n. 7
0
    log.info('Preparing training data and storing it in {}'.format(
        args.data_dir))
    prepare_training_data(args.chime_dir, args.data_dir)

flists = dict()
for stage in ['tr', 'dt']:
    with open(os.path.join(args.data_dir,
                           'flist_{}.json'.format(stage))) as fid:
        flists[stage] = json.load(fid)
log.debug('Loaded file lists')

# Prepare model
if args.model_type == 'BLSTM':
    model = BLSTMMaskEstimator()
    model_save_dir = os.path.join(args.data_dir, 'BLSTM_model')
    mkdir_p(model_save_dir)
elif args.model_type == 'FW':
    model = SimpleFWMaskEstimator()
    model_save_dir = os.path.join(args.data_dir, 'FW_model')
    mkdir_p(model_save_dir)
else:
    raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"')

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
xp = np if args.gpu < 0 else cuda.cupy
log.debug('Prepared model')

# Setup optimizer
optimizer = optimizers.Adam()
Esempio n. 8
0
def prepare_other_training_data(train_dir, dest_dir):
    start = 0
    chime_data_dir = os.path.join(train_dir[:-1], 'tr')
    print(chime_data_dir)

    for stage in ['tr', 'dt']:
        if stage is 'dt':
            chime_data_dir = os.path.join(train_dir[:-1], 'dt')
            print(chime_data_dir)
        reset_counter = 0
        # flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))]
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav')
        print("noise_data size:", noise_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f)
            path = os.path.join(chime_data_dir, f)
            clean_audio = get_audio_single(path)
            # clean_audioa = audioread(path)
            # clean_audiob = audioread(path)
            # multi_track = list()
            # multi_track.append(clean_audioa[None, :])
            # multi_track.append(clean_audiob[None, :])
            # multi_track = np.concatenate(multi_track, axis=0)
            # multi_track = multi_track.astype(np.float32)
            # print(multi_track.shape)
            chime_size = audioread(path)

            noise_files = list()
            end = chime_size.shape[0] + start
            if end > noise_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 2):
                y = noise_data[start:end]
            start = end
            noise_files.append(y[None, :])

            noise_files = np.concatenate(noise_files, axis=0)
            noise_files = noise_files.astype(np.float32)
            noise_audio = noise_files
            # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape)
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)
Esempio n. 9
0
xp = np if args.gpu < 0 else cuda.cupy

stage = args.flist[:2]
scenario = args.flist.split('_')[-1]

# CHiME data handling
if scenario == 'simu':
    flist = gen_flist_simu(args.chime_dir, stage)
elif scenario == 'real':
    flist = gen_flist_real(args.chime_dir, stage)
else:
    raise ValueError('Unknown flist {}'.format(args.flist))

for env in ['caf', 'bus', 'str', 'ped']:
    mkdir_p(os.path.join(args.output_dir, '{}05_{}_{}'.format(
            stage, env, scenario
    )))

t_io = 0
t_net = 0
t_beamform = 0
# Beamform loop
for cur_line in tqdm(flist):
    with Timer() as t:
        if scenario == 'simu':
            audio_data = get_audio_data(cur_line)
            context_samples = 0
        elif scenario == 'real':
            audio_data, context_samples = get_audio_data_with_context(
                    cur_line[0], cur_line[1], cur_line[2])
    t_io += t.msecs
Esempio n. 10
0
            'Preparing training data and storing it in {}'.format(
                    args.data_dir))
    prepare_training_data(args.chime_dir, args.data_dir)

flists = dict()
for stage in ['tr', 'dt']:
    with open(
            os.path.join(args.data_dir, 'flist_{}.json'.format(stage))) as fid:
        flists[stage] = json.load(fid)
log.debug('Loaded file lists')

# Prepare model
if args.model_type == 'BLSTM':
    model = BLSTMMaskEstimator()
    model_save_dir = os.path.join(args.data_dir, 'BLSTM_model')
    mkdir_p(model_save_dir)
elif args.model_type == 'FW':
    model = SimpleFWMaskEstimator()
    model_save_dir = os.path.join(args.data_dir, 'FW_model')
    mkdir_p(model_save_dir)
else:
    raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"')

if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()
xp = np if args.gpu < 0 else cuda.cupy
log.debug('Prepared model')

# Setup optimizer
optimizer = optimizers.Adam()