Ejemplo n.º 1
0
def load_data(args, pars):
    # Data sets and data loaders
    print('Data')
    dataset_train = datain.DataSet(args.path_data,
                                   pars.lchunk,
                                   pars.stride,
                                   split='train',
                                   sampling_rate=pars.sr,
                                   trim=pars.trim,
                                   frame_energy_thres=pars.frame_energy_thres,
                                   temp_jitter=pars.augment > 0,
                                   seed=args.seed)
    dataset_valid = datain.DataSet(args.path_data,
                                   pars.lchunk,
                                   pars.stride,
                                   split='valid',
                                   sampling_rate=pars.sr,
                                   trim=pars.trim,
                                   frame_energy_thres=pars.frame_energy_thres,
                                   temp_jitter=False,
                                   seed=args.seed)
    dataset_test = datain.DataSet(args.path_data,
                                  pars.lchunk,
                                  pars.stride,
                                  split='test',
                                  sampling_rate=pars.sr,
                                  trim=pars.trim,
                                  frame_energy_thres=pars.frame_energy_thres,
                                  temp_jitter=False,
                                  seed=args.seed)
    loader_train = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.sbatch,
                                               shuffle=True,
                                               drop_last=True,
                                               num_workers=pars.nworkers)
    loader_valid = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.sbatch,
                                               shuffle=False,
                                               num_workers=pars.nworkers)
    loader_test = torch.utils.data.DataLoader(dataset_test,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=pars.nworkers)

    print('-' * 100)
    return dataset_train, loader_train, dataset_valid, loader_valid, dataset_test, loader_test
Ejemplo n.º 2
0
def compute_speaker_averages(speakers, trim=5 * 60):
    print('(' * 100)
    print('[Averages]')
    select_speaker = None
    if args.force_source_speaker is not None and args.force_target_speaker is not None:
        select_speaker = args.force_source_speaker + ',' + args.force_target_speaker
    dataset = datain.DataSet(args.path_data,
                             args.lchunk,
                             args.lchunk,
                             sampling_rate=pars.sr,
                             split='train+valid',
                             trim=trim,
                             select_speaker=select_speaker,
                             seed=pars.seed)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.sbatch,
                                         shuffle=False,
                                         num_workers=0)
    averages = {}
    count = {}
    with torch.no_grad():
        for b, (x, idx) in enumerate(loader):
            x = x.to(args.device)
            s = idx[:, 3].to(args.device)
            z = model.forward(x, s)[0]
            for n in range(len(idx)):
                i, j, last, ispk, ichap = idx[n]
                spk, _ = dataset.filename_split(dataset.filenames[i])
                spk = speakers[spk]
                if spk not in averages:
                    averages[spk] = torch.zeros_like(z[n])
                    count[spk] = 0
                averages[spk] += z[n]
                count[spk] += 1
            print('\r---> Speaker(s) average: {:5.1f}%'.format(
                100 * (b * args.sbatch + x.size(0)) / len(dataset)),
                  end='')
        print()
    for spk in averages.keys():
        averages[spk] = averages[spk] / count[spk]
    print(')' * 100)
    return averages
Ejemplo n.º 3
0
                  end='')
        print()
    for spk in averages.keys():
        averages[spk] = averages[spk] / count[spk]
    print(')' * 100)
    return averages


########################################################################################################################

# Data
print('Load metadata')
dataset = datain.DataSet(args.path_data,
                         pars.lchunk,
                         pars.stride,
                         sampling_rate=pars.sr,
                         split='train+valid',
                         seed=pars.seed,
                         do_audio_load=False)
speakers = deepcopy(dataset.speakers)
lspeakers = list(speakers.keys())
if args.zavg:
    averages = compute_speaker_averages(speakers)

# Input data
print('Load', args.split, 'audio')
dataset = datain.DataSet(args.path_data,
                         args.lchunk,
                         args.stride,
                         sampling_rate=pars.sr,
                         split=args.split,
Ejemplo n.º 4
0
    )
    window = torch.hann_window(args.lchunk)
    args.synth_nonorm = False
window = window.view(1, -1)

print('-' * 100)

########################################################################################################################

# Data
print('Load VCTK_22kHz_adapt test split speakers')
adapt_data_path = os.path.join(args.path_data_root, 'VCTK_22kHz_10')
dataset = datain.DataSet(adapt_data_path,
                         pars.lchunk,
                         pars.stride,
                         sampling_rate=pars.sr,
                         split='test',
                         seed=pars.seed,
                         do_audio_load=False)
speakers = deepcopy(dataset.speakers)
lspeakers = list(speakers.keys())
print('Adapting to these speakers:')
print(lspeakers)

# Input data
print('Load VCTK_22kHz_train test split audio')
train_data_path = os.path.join(args.path_data_root, 'VCTK_22kHz_98')
dataset = datain.DataSet(train_data_path,
                         args.lchunk,
                         args.stride,
                         sampling_rate=pars.sr,
Ejemplo n.º 5
0
    pars.load_existing=args.load_existing
    pars.base_fn_out=args.base_fn_out
    pars.multigpu=args.multigpu
    pars.sbatch=args.sbatch
    pars.optim=args.optim
    pars.lr=args.lr
    args=pars
    print('New arguments')
    utils.print_arguments(args)

########################################################################################################################

# Data sets and data loaders
print('Data')
dataset_train=datain.DataSet(args.path_data,args.lchunk,args.stride,split='train',sampling_rate=args.sr,
                             trim=args.trim,frame_energy_thres=args.frame_energy_thres,
                             temp_jitter=args.augment>0,
                             seed=args.seed)
dataset_valid=datain.DataSet(args.path_data,args.lchunk,args.stride,split='valid',sampling_rate=args.sr,
                             trim=args.trim,frame_energy_thres=args.frame_energy_thres,
                             temp_jitter=False,
                             seed=args.seed)
dataset_test=datain.DataSet(args.path_data,args.lchunk,args.stride,split='test',sampling_rate=args.sr,
                            trim=args.trim,frame_energy_thres=args.frame_energy_thres,
                            temp_jitter=False,
                            seed=args.seed)
loader_train=torch.utils.data.DataLoader(dataset_train,batch_size=args.sbatch,shuffle=True,drop_last=True,num_workers=args.nworkers)
loader_valid=torch.utils.data.DataLoader(dataset_valid,batch_size=args.sbatch,shuffle=False,num_workers=args.nworkers)
loader_test=torch.utils.data.DataLoader(dataset_test,batch_size=args.sbatch,shuffle=False,num_workers=args.nworkers)
print('-'*100)

########################################################################################################################
Ejemplo n.º 6
0
utils.print_arguments(args)

# Seed
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.device == 'cuda':
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.cuda.manual_seed(args.seed)

# Data sets and data loaders
print('Data')
dataset_train = datain.DataSet(args.path_data, args.lchunk, args.stride,
                               # select_speaker='p285,p303',
                               # split = 'train' or 'train-overfit'
                               split=args.train_split, sampling_rate=args.sr,
                               trim=args.trim,
                               frame_energy_thres=args.frame_energy_thres,
                               temp_jitter=args.augment > 0,
                               seed=args.seed)
dataset_valid = datain.DataSet(args.path_data, args.lchunk, args.stride,
                               # select_speaker='p285,p303',
                               split='valid', sampling_rate=args.sr,
                               trim=args.trim,
                               frame_energy_thres=args.frame_energy_thres,
                               temp_jitter=False,
                               seed=args.seed)
dataset_test = datain.DataSet(args.path_data, args.lchunk, args.stride,
                              # select_speaker='p285,p303',
                              split='test', sampling_rate=args.sr,
                              trim=args.trim,
                              frame_energy_thres=args.frame_energy_thres,