コード例 #1
0
ファイル: train_encoder.py プロジェクト: y-kamiya/TTS
def main(args):  # pylint: disable=redefined-outer-name
    # pylint: disable=global-variable-undefined
    global meta_data_train
    global meta_data_eval

    ap = AudioProcessor(**c.audio)
    model = SpeakerEncoder(
        input_dim=c.model["input_dim"],
        proj_dim=c.model["proj_dim"],
        lstm_dim=c.model["lstm_dim"],
        num_lstm_layers=c.model["num_lstm_layers"],
    )
    optimizer = RAdam(model.parameters(), lr=c.lr)

    if c.loss == "ge2e":
        criterion = GE2ELoss(loss_method="softmax")
    elif c.loss == "angleproto":
        criterion = AngleProtoLoss()
    else:
        raise Exception("The %s  not is a loss supported" % c.loss)

    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        try:
            # TODO: fix optimizer init, model.cuda() needs to be called before
            # optimizer restore
            # optimizer.load_state_dict(checkpoint['optimizer'])
            if c.reinit_layers:
                raise RuntimeError
            model.load_state_dict(checkpoint["model"])
        except KeyError:
            print(" > Partial model initialization.")
            model_dict = model.state_dict()
            model_dict = set_init_dict(model_dict, checkpoint, c)
            model.load_state_dict(model_dict)
            del model_dict
        for group in optimizer.param_groups:
            group["lr"] = c.lr
        print(" > Model restored from step %d" % checkpoint["step"], flush=True)
        args.restore_step = checkpoint["step"]
    else:
        args.restore_step = 0

    if use_cuda:
        model = model.cuda()
        criterion.cuda()

    if c.lr_decay:
        scheduler = NoamLR(optimizer, warmup_steps=c.warmup_steps, last_epoch=args.restore_step - 1)
    else:
        scheduler = None

    num_params = count_parameters(model)
    print("\n > Model has {} parameters".format(num_params), flush=True)

    # pylint: disable=redefined-outer-name
    meta_data_train, meta_data_eval = load_meta_data(c.datasets)

    global_step = args.restore_step
    _, global_step = train(model, criterion, optimizer, scheduler, ap, global_step)
コード例 #2
0
            #print(f'wav_file: {wav_file}')
            if os.path.exists(wav_file):
                wav_files.append(wav_file)
    print(f'Count of wavs imported: {len(wav_files)}')
else:
    # Parse all wav files in data_path
    wav_path = data_path
    wav_files = glob.glob(data_path + '/**/*.wav', recursive=True)

output_files = [
    wav_file.replace(wav_path, args.output_path).replace('.wav', '.npy')
    for wav_file in wav_files
]

for output_file in output_files:
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

model = SpeakerEncoder(**c.model)
model.load_state_dict(torch.load(args.model_path)['model'])
model.eval()
if args.use_cuda:
    model.cuda()

for idx, wav_file in enumerate(tqdm(wav_files)):
    mel_spec = ap.melspectrogram(ap.load_wav(wav_file)).T
    mel_spec = torch.FloatTensor(mel_spec[None, :, :])
    if args.use_cuda:
        mel_spec = mel_spec.cuda()
    embedd = model.compute_embedding(mel_spec)
    np.save(output_files[idx], embedd.detach().cpu().numpy())