def prepare_training_data(chime_data_dir, dest_dir, suffix_id): for stage in ['tr', 'dt']: fpath, flist = gen_flist_simu(chime_data_dir, stage, suffix_id) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(fpath)): clean_audio = get_audio_data(f, '_clean') noise_audio = get_audio_data(f, '_noise') X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open( os.path.join(dest_dir, 'flist_{}_{}.json'.format(stage, suffix_id)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def test(args): from test_utils import ModelTest device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Prepare Data test_dataset = Chime_Dataset('dt', args) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, collate_fn=lambda x: Chime_Collate(x), num_workers=args.num_workers) # Prepare model if args.model_type == 'BLSTM': model = BLSTMMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'BLSTM_model') mkdir_p(model_save_dir) elif args.model_type == 'FW': model = SimpleFWMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'FW_model') mkdir_p(model_save_dir) else: raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"') criterion = torch.nn.BCELoss() tester = ModelTest(model, test_loader, criterion, args, device) tester.test()
def train(args): from train_utils import ModelTrainer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Prepare Data train_dataset = Chime_Dataset('tr', args) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, collate_fn=lambda x: Chime_Collate(x), num_workers=args.num_workers) # Prepare model if args.model_type == 'BLSTM': model = BLSTMMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'BLSTM_model') mkdir_p(model_save_dir) elif args.model_type == 'FW': model = SimpleFWMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'FW_model') mkdir_p(model_save_dir) else: raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"') criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-4) trainer = ModelTrainer(model, train_loader, criterion, optimizer, args, device) trainer.train(args.num_epochs)
def prepare_clean_training_data(chime_data_dir, dest_dir): start = 0 # print("sdsd") for stage in ['tr', 'dt']: reset_counter = 0 flist = gen_flist_simu(chime_data_dir, stage, ext=True) # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav') print("clean_data size:", clean_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') # print(chime_data_dir) chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, '')) clean_files = list() end = chime_size.shape[0] + start if end > clean_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 7): y = clean_data[start:end] start = end clean_files.append(y[None, :]) clean_files = np.concatenate(clean_files, axis=0) clean_files = clean_files.astype(np.float32) clean_audio = clean_files X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
def prepare_training_data(chime_data_dir, dest_dir): for stage in ['tr', 'dt']: flist = gen_flist_simu(chime_data_dir, stage, ext=True) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): clean_audio = get_audio_data(f, '.Clean') noise_audio = get_audio_data(f, '.Noise') X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
stage = args.flist[:2] scenario = args.flist.split('_')[-1] # CHiME data handling if scenario == 'simu': flist = gen_flist_simu(args.chime_dir, stage) elif scenario == 'real': flist = gen_flist_real(args.chime_dir, stage) else: raise ValueError('Unknown flist {}'.format(args.flist)) for env in ['caf', 'bus', 'str', 'ped']: for beamformer in beamformers: mkdir_p(os.path.join(args.output_dir, beamformer,'{}05_{}_{}'.format( stage, env, scenario ))) t_io = 0 t_net = 0 t_beamform = 0 # Beamform loop for cur_line in tqdm(flist): with Timer() as t: if scenario == 'simu': audio_data = get_audio_data(cur_line) context_samples = 0 elif scenario == 'real': audio_data, context_samples = get_audio_data_with_context( cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs
log.info('Preparing training data and storing it in {}'.format( args.data_dir)) prepare_training_data(args.chime_dir, args.data_dir) flists = dict() for stage in ['tr', 'dt']: with open(os.path.join(args.data_dir, 'flist_{}.json'.format(stage))) as fid: flists[stage] = json.load(fid) log.debug('Loaded file lists') # Prepare model if args.model_type == 'BLSTM': model = BLSTMMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'BLSTM_model') mkdir_p(model_save_dir) elif args.model_type == 'FW': model = SimpleFWMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'FW_model') mkdir_p(model_save_dir) else: raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"') if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy log.debug('Prepared model') # Setup optimizer optimizer = optimizers.Adam()
def prepare_other_training_data(train_dir, dest_dir): start = 0 chime_data_dir = os.path.join(train_dir[:-1], 'tr') print(chime_data_dir) for stage in ['tr', 'dt']: if stage is 'dt': chime_data_dir = os.path.join(train_dir[:-1], 'dt') print(chime_data_dir) reset_counter = 0 # flist = gen_flist_simu(chime_data_dir, stage, ext=True) flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))] # print(flist) export_flist = list() mkdir_p(os.path.join(dest_dir, stage)) noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav') print("noise_data size:", noise_data.shape[0]) for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)): # clean_audio = get_audio_data(f) path = os.path.join(chime_data_dir, f) clean_audio = get_audio_single(path) # clean_audioa = audioread(path) # clean_audiob = audioread(path) # multi_track = list() # multi_track.append(clean_audioa[None, :]) # multi_track.append(clean_audiob[None, :]) # multi_track = np.concatenate(multi_track, axis=0) # multi_track = multi_track.astype(np.float32) # print(multi_track.shape) chime_size = audioread(path) noise_files = list() end = chime_size.shape[0] + start if end > noise_data.shape[0]: print("reset counter: ", reset_counter + 1) start = 0 end = chime_size.shape[0] + start for i in range(1, 2): y = noise_data[start:end] start = end noise_files.append(y[None, :]) noise_files = np.concatenate(noise_files, axis=0) noise_files = noise_files.astype(np.float32) noise_audio = noise_files # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape) X = stft(clean_audio, time_dim=1).transpose((1, 0, 2)) N = stft(noise_audio, time_dim=1).transpose((1, 0, 2)) IBM_X, IBM_N = estimate_IBM(X, N) Y_abs = np.abs(X + N) export_dict = { 'IBM_X': IBM_X.astype(np.float32), 'IBM_N': IBM_N.astype(np.float32), 'Y_abs': Y_abs.astype(np.float32) } export_name = os.path.join(dest_dir, stage, f.split('/')[-1]) with open(export_name, 'wb') as fid: pickle.dump(export_dict, fid) export_flist.append(os.path.join(stage, f.split('/')[-1])) with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)), 'w') as fid: json.dump(export_flist, fid, indent=4)
xp = np if args.gpu < 0 else cuda.cupy stage = args.flist[:2] scenario = args.flist.split('_')[-1] # CHiME data handling if scenario == 'simu': flist = gen_flist_simu(args.chime_dir, stage) elif scenario == 'real': flist = gen_flist_real(args.chime_dir, stage) else: raise ValueError('Unknown flist {}'.format(args.flist)) for env in ['caf', 'bus', 'str', 'ped']: mkdir_p(os.path.join(args.output_dir, '{}05_{}_{}'.format( stage, env, scenario ))) t_io = 0 t_net = 0 t_beamform = 0 # Beamform loop for cur_line in tqdm(flist): with Timer() as t: if scenario == 'simu': audio_data = get_audio_data(cur_line) context_samples = 0 elif scenario == 'real': audio_data, context_samples = get_audio_data_with_context( cur_line[0], cur_line[1], cur_line[2]) t_io += t.msecs
'Preparing training data and storing it in {}'.format( args.data_dir)) prepare_training_data(args.chime_dir, args.data_dir) flists = dict() for stage in ['tr', 'dt']: with open( os.path.join(args.data_dir, 'flist_{}.json'.format(stage))) as fid: flists[stage] = json.load(fid) log.debug('Loaded file lists') # Prepare model if args.model_type == 'BLSTM': model = BLSTMMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'BLSTM_model') mkdir_p(model_save_dir) elif args.model_type == 'FW': model = SimpleFWMaskEstimator() model_save_dir = os.path.join(args.data_dir, 'FW_model') mkdir_p(model_save_dir) else: raise ValueError('Unknown model type. Possible are "BLSTM" and "FW"') if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy log.debug('Prepared model') # Setup optimizer optimizer = optimizers.Adam()