def load_data(args, pars): # Data sets and data loaders print('Data') dataset_train = datain.DataSet(args.path_data, pars.lchunk, pars.stride, split='train', sampling_rate=pars.sr, trim=pars.trim, frame_energy_thres=pars.frame_energy_thres, temp_jitter=pars.augment > 0, seed=args.seed) dataset_valid = datain.DataSet(args.path_data, pars.lchunk, pars.stride, split='valid', sampling_rate=pars.sr, trim=pars.trim, frame_energy_thres=pars.frame_energy_thres, temp_jitter=False, seed=args.seed) dataset_test = datain.DataSet(args.path_data, pars.lchunk, pars.stride, split='test', sampling_rate=pars.sr, trim=pars.trim, frame_energy_thres=pars.frame_energy_thres, temp_jitter=False, seed=args.seed) loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.sbatch, shuffle=True, drop_last=True, num_workers=pars.nworkers) loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=args.sbatch, shuffle=False, num_workers=pars.nworkers) loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=pars.nworkers) print('-' * 100) return dataset_train, loader_train, dataset_valid, loader_valid, dataset_test, loader_test
def compute_speaker_averages(speakers, trim=5 * 60): print('(' * 100) print('[Averages]') select_speaker = None if args.force_source_speaker is not None and args.force_target_speaker is not None: select_speaker = args.force_source_speaker + ',' + args.force_target_speaker dataset = datain.DataSet(args.path_data, args.lchunk, args.lchunk, sampling_rate=pars.sr, split='train+valid', trim=trim, select_speaker=select_speaker, seed=pars.seed) loader = torch.utils.data.DataLoader(dataset, batch_size=args.sbatch, shuffle=False, num_workers=0) averages = {} count = {} with torch.no_grad(): for b, (x, idx) in enumerate(loader): x = x.to(args.device) s = idx[:, 3].to(args.device) z = model.forward(x, s)[0] for n in range(len(idx)): i, j, last, ispk, ichap = idx[n] spk, _ = dataset.filename_split(dataset.filenames[i]) spk = speakers[spk] if spk not in averages: averages[spk] = torch.zeros_like(z[n]) count[spk] = 0 averages[spk] += z[n] count[spk] += 1 print('\r---> Speaker(s) average: {:5.1f}%'.format( 100 * (b * args.sbatch + x.size(0)) / len(dataset)), end='') print() for spk in averages.keys(): averages[spk] = averages[spk] / count[spk] print(')' * 100) return averages
end='') print() for spk in averages.keys(): averages[spk] = averages[spk] / count[spk] print(')' * 100) return averages ######################################################################################################################## # Data print('Load metadata') dataset = datain.DataSet(args.path_data, pars.lchunk, pars.stride, sampling_rate=pars.sr, split='train+valid', seed=pars.seed, do_audio_load=False) speakers = deepcopy(dataset.speakers) lspeakers = list(speakers.keys()) if args.zavg: averages = compute_speaker_averages(speakers) # Input data print('Load', args.split, 'audio') dataset = datain.DataSet(args.path_data, args.lchunk, args.stride, sampling_rate=pars.sr, split=args.split,
) window = torch.hann_window(args.lchunk) args.synth_nonorm = False window = window.view(1, -1) print('-' * 100) ######################################################################################################################## # Data print('Load VCTK_22kHz_adapt test split speakers') adapt_data_path = os.path.join(args.path_data_root, 'VCTK_22kHz_10') dataset = datain.DataSet(adapt_data_path, pars.lchunk, pars.stride, sampling_rate=pars.sr, split='test', seed=pars.seed, do_audio_load=False) speakers = deepcopy(dataset.speakers) lspeakers = list(speakers.keys()) print('Adapting to these speakers:') print(lspeakers) # Input data print('Load VCTK_22kHz_train test split audio') train_data_path = os.path.join(args.path_data_root, 'VCTK_22kHz_98') dataset = datain.DataSet(train_data_path, args.lchunk, args.stride, sampling_rate=pars.sr,
pars.load_existing=args.load_existing pars.base_fn_out=args.base_fn_out pars.multigpu=args.multigpu pars.sbatch=args.sbatch pars.optim=args.optim pars.lr=args.lr args=pars print('New arguments') utils.print_arguments(args) ######################################################################################################################## # Data sets and data loaders print('Data') dataset_train=datain.DataSet(args.path_data,args.lchunk,args.stride,split='train',sampling_rate=args.sr, trim=args.trim,frame_energy_thres=args.frame_energy_thres, temp_jitter=args.augment>0, seed=args.seed) dataset_valid=datain.DataSet(args.path_data,args.lchunk,args.stride,split='valid',sampling_rate=args.sr, trim=args.trim,frame_energy_thres=args.frame_energy_thres, temp_jitter=False, seed=args.seed) dataset_test=datain.DataSet(args.path_data,args.lchunk,args.stride,split='test',sampling_rate=args.sr, trim=args.trim,frame_energy_thres=args.frame_energy_thres, temp_jitter=False, seed=args.seed) loader_train=torch.utils.data.DataLoader(dataset_train,batch_size=args.sbatch,shuffle=True,drop_last=True,num_workers=args.nworkers) loader_valid=torch.utils.data.DataLoader(dataset_valid,batch_size=args.sbatch,shuffle=False,num_workers=args.nworkers) loader_test=torch.utils.data.DataLoader(dataset_test,batch_size=args.sbatch,shuffle=False,num_workers=args.nworkers) print('-'*100) ########################################################################################################################
utils.print_arguments(args) # Seed np.random.seed(args.seed) torch.manual_seed(args.seed) if args.device == 'cuda': torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.cuda.manual_seed(args.seed) # Data sets and data loaders print('Data') dataset_train = datain.DataSet(args.path_data, args.lchunk, args.stride, # select_speaker='p285,p303', # split = 'train' or 'train-overfit' split=args.train_split, sampling_rate=args.sr, trim=args.trim, frame_energy_thres=args.frame_energy_thres, temp_jitter=args.augment > 0, seed=args.seed) dataset_valid = datain.DataSet(args.path_data, args.lchunk, args.stride, # select_speaker='p285,p303', split='valid', sampling_rate=args.sr, trim=args.trim, frame_energy_thres=args.frame_energy_thres, temp_jitter=False, seed=args.seed) dataset_test = datain.DataSet(args.path_data, args.lchunk, args.stride, # select_speaker='p285,p303', split='test', sampling_rate=args.sr, trim=args.trim, frame_energy_thres=args.frame_energy_thres,