コード例 #1
0
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(hparams, stage=args.stage)

    if args.stage!=0:
        checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}"
        state_dict = {}
        for k, v in torch.load(checkpoint_path)['state_dict'].items():
            state_dict[k[7:]]=v

        model = Model(hparams).cuda()
        model.load_state_dict(state_dict)
        model = nn.DataParallel(model).cuda()
    else:
        model = nn.DataParallel(Model(hparams)).cuda()

    criterion = MDNLoss()
    writer = get_writer(hparams.output_directory, f'{hparams.log_directory}/stage{args.stage}')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams.lr,
                                 betas=(0.9, 0.98),
                                 eps=1e-09)
    iteration, loss = 0, 0
    model.train()

    print(f'Stage{args.stage} Start!!! ({str(datetime.now())})')
    while True:
        for i, batch in enumerate(train_loader):
            if args.stage==0:
                text_padded, mel_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]
                align_padded=None
            else:
                text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]

            sub_loss = model(text_padded,
                             mel_padded,
                             align_padded,
                             text_lengths,
                             mel_lengths,
                             criterion,
                             stage=args.stage)
            sub_loss = sub_loss.mean()/hparams.accumulation
            sub_loss.backward()
            loss = loss+sub_loss.item()
            iteration += 1

            if iteration%hparams.accumulation == 0:
                lr_scheduling(optimizer, iteration//hparams.accumulation)
                nn.utils.clip_grad_norm_(model.parameters(), hparams.grad_clip_thresh)
                optimizer.step()
                model.zero_grad()
                writer.add_scalar('Train loss', loss, iteration//hparams.accumulation)
                loss=0

            if iteration%(hparams.iters_per_validation*hparams.accumulation)==0:
                validate(model, criterion, val_loader, iteration, writer, args.stage)

            if iteration%(hparams.iters_per_checkpoint*hparams.accumulation)==0:
                save_checkpoint(model,
                                optimizer,
                                hparams.lr,
                                iteration//hparams.accumulation,
                                filepath=f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}')

            if iteration==(hparams.train_steps[args.stage]*hparams.accumulation):
                break

        if iteration==(hparams.train_steps[args.stage]*hparams.accumulation):
            break
            
    print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
コード例 #2
0
def main():
    data_type = 'phone'
    checkpoint_path = f"training_log/aligntts/stage0/checkpoint_{hparams.train_steps[0]}"
    state_dict = {}

    for k, v in torch.load(checkpoint_path)['state_dict'].items():
        state_dict[k[7:]] = v

    model = Model(hparams).cuda()
    model.load_state_dict(state_dict)
    _ = model.cuda().eval()
    criterion = MDNLoss()

    #datasets = ['train', 'val', 'test']
    datasets = ['train']
    batch_size = 64

    for dataset in datasets:
        #with open(f'filelists/ljs_audio_text_{dataset}_filelist.txt', 'r') as f:
        with open(f'/hd0/speech-aligner/metadata/metadata.csv', 'r') as f:
            lines_raw = [line.split('|') for line in f.read().splitlines()]
            lines_list = [
                lines_raw[batch_size * i:batch_size * (i + 1)]
                for i in range(len(lines_raw) // batch_size + 1)
            ]

        for batch in tqdm(lines_list):
            file_list, text_list, mel_list = [], [], []
            text_lengths, mel_lengths = [], []

            for i in range(len(batch)):
                file_name, _, text = batch[i]
                file_name = os.path.splitext(file_name)[0]
                file_list.append(file_name)
                seq = os.path.join(
                    '/hd0/speech-aligner/preprocessed/VCTK20_engspks',
                    f'{data_type}_seq')
                mel = os.path.join(
                    '/hd0/speech-aligner/preprocessed/VCTK20_engspks',
                    'melspectrogram')

                seq = torch.from_numpy(
                    np.load(f'{seq}/{file_name}_sequence.npy'))
                mel = torch.from_numpy(
                    np.load(f'{mel}/{file_name}_melspectrogram.npy'))

                text_list.append(seq)
                mel_list.append(mel)
                text_lengths.append(seq.size(0))
                mel_lengths.append(mel.size(1))

            text_lengths = torch.LongTensor(text_lengths)
            mel_lengths = torch.LongTensor(mel_lengths)
            text_padded = torch.zeros(len(batch),
                                      text_lengths.max().item(),
                                      dtype=torch.long)
            mel_padded = torch.zeros(len(batch), hparams.n_mel_channels,
                                     mel_lengths.max().item())

            for j in range(len(batch)):
                text_padded[j, :text_list[j].size(0)] = text_list[j]
                mel_padded[j, :, :mel_list[j].size(1)] = mel_list[j]

            text_padded = text_padded.cuda()
            mel_padded = mel_padded.cuda()
            mel_padded = (
                torch.clamp(mel_padded, hparams.min_db, hparams.max_db) -
                hparams.min_db) / (hparams.max_db - hparams.min_db)
            text_lengths = text_lengths.cuda()
            mel_lengths = mel_lengths.cuda()

            with torch.no_grad():
                encoder_input = model.Prenet(text_padded)
                hidden_states, _ = model.FFT_lower(encoder_input, text_lengths)
                mu_sigma = model.get_mu_sigma(hidden_states)
                _, log_prob_matrix = criterion(mu_sigma, mel_padded,
                                               text_lengths, mel_lengths)

                align = model.viterbi(log_prob_matrix, text_lengths,
                                      mel_lengths).to(torch.long)
                alignments = list(torch.split(align, 1))

            for j, (l, t) in enumerate(zip(text_lengths, mel_lengths)):
                alignments[j] = alignments[j][0, :l.item(), :t.item()].sum(
                    dim=-1)
                os.makedirs(
                    "/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{}"
                    .format(file_list[j].split('/')[0]),
                    exist_ok=True)
                np.save(
                    f'/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{file_list[j]}_alignment.npy',
                    alignments[j].detach().cpu().numpy())
                # plt.imshow(align[j].detach().cpu().numpy())
                # plt.gca().invert_yaxis()
                # plt.savefig(f"/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{file_list[j]}_alignment.png", format='png')

    print("Alignments Extraction End!!! ({datetime.now()})")
コード例 #3
0
def training_process(device, nb_class_labels, model_path, result_dir, patience,
                     epochs, do_pre_train, tr_feat_path, tr_labels_path,
                     val_feat_path, val_labels_path, tr_batch_size,
                     val_batch_size, adapt_patience, adapt_epochs, d_lr,
                     tgt_lr, update_cnt, factor):
    """Implements the complete training process of the AUDASC method.

    :param device: The device that we will use.
    :type device: str
    :param nb_class_labels: The amount of labels for label classification.
    :type nb_class_labels: int
    :param model_path: The path of previously saved model (if any)
    :type model_path: str
    :param result_dir: The directory to save newly pre-trained model.
    :type result_dir: str
    :param patience: The patience for the pre-training step.
    :type patience: int
    :param epochs: The epochs for the pre-training step.
    :type epochs: int
    :param do_pre_train: Flag to indicate if we do pre-training.
    :type do_pre_train: bool
    :param tr_feat_path: The path for loading the training features.
    :type tr_feat_path: str
    :param tr_labels_path: The path for loading the training labels.
    :type tr_labels_path: str
    :param val_feat_path: The path for loading the validation features.
    :type val_feat_path: str
    :param val_labels_path: The path for loading the validation labels.
    :type val_labels_path: str
    :param tr_batch_size: The batch used for pre-training.
    :type tr_batch_size: int
    :param val_batch_size: The batch size used for validation.
    :type val_batch_size: int
    :param adapt_patience: The patience for the domain adaptation step.
    :type adapt_patience: int
    :param adapt_epochs: The epochs for the domain adaptation step.
    :type adapt_epochs: int
    :param d_lr: The learning rate for the discriminator.
    :type d_lr: float
    :param tgt_lr: The learning rate for the adapted model.
    :type tgt_lr: float
    :param update_cnt: An update controller for adversarial loss
    :type update_cnt: int
    :param factor: the coefficient used to be multiplied by classification loss.
    :type factor: int
    """

    tr_feat = device_exchange(file_io.load_pickled_features(tr_feat_path),
                              device=device)
    tr_labels = device_exchange(file_io.load_pickled_features(tr_labels_path),
                                device=device)
    val_feat = device_exchange(file_io.load_pickled_features(val_feat_path),
                               device=device)
    val_labels = device_exchange(
        file_io.load_pickled_features(val_labels_path), device=device)

    loss_func = functional.cross_entropy

    non_adapted_cnn = Model().to(device)
    label_classifier = LabelClassifier(nb_class_labels).to(device)

    if not path.exists(result_dir):
        makedirs(result_dir)

    if do_pre_train:
        state_dict_path = result_dir

        printing.info_msg('Pre-training step')

        optimizer_source = torch.optim.Adam(
            list(non_adapted_cnn.parameters()) +
            list(label_classifier.parameters()),
            lr=1e-4)

        pre_training.pre_training(model=non_adapted_cnn,
                                  label_classifier=label_classifier,
                                  optimizer=optimizer_source,
                                  tr_batch_size=tr_batch_size,
                                  val_batch_size=val_batch_size,
                                  tr_feat=tr_feat['A'],
                                  tr_labels=tr_labels['A'],
                                  val_feat=val_feat['A'],
                                  val_labels=val_labels['A'],
                                  epochs=epochs,
                                  criterion=loss_func,
                                  patience=patience,
                                  result_dir=state_dict_path)

        del optimizer_source

    else:
        printing.info_msg('Loading a pre-trained non-adapted model')
        state_dict_path = model_path

    if not path.exists(state_dict_path):
        raise ValueError(
            'The path for loading the pre trained model does not exist!')

    non_adapted_cnn.load_state_dict(
        torch.load(path.join(state_dict_path, 'non_adapted_cnn.pytorch')))
    label_classifier.load_state_dict(
        torch.load(path.join(state_dict_path, 'label_classifier.pytorch')))

    printing.info_msg('Training the Adversarial Adaptation Model')

    target_cnn = Model().to(device)
    target_cnn.load_state_dict(non_adapted_cnn.state_dict())
    discriminator = Discriminator(2).to(device)

    target_model_opt = torch.optim.Adam(target_cnn.parameters(), lr=tgt_lr)
    discriminator_opt = torch.optim.Adam(discriminator.parameters(), lr=d_lr)

    domain_adaptation.domain_adaptation(
        non_adapted_cnn, target_cnn, label_classifier, discriminator,
        target_model_opt, discriminator_opt, loss_func, loss_func, loss_func,
        tr_feat, tr_labels, val_feat, val_labels, adapt_epochs, update_cnt,
        result_dir, adapt_patience, device, factor)
コード例 #4
0
ファイル: train.py プロジェクト: rendchevi/BVAE-TTS
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(hp)
    model = Model(hp).cuda()
    optimizer = torch.optim.Adamax(model.parameters(), lr=hp.lr)
    writer = get_writer(hp.output_directory, args.logdir)
    #model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    ### Load trained checkpoint ###
    if args.checkpoint_path != '':
        model.load_state_dict(torch.load(args.checkpoint_path)['state_dict'])
        print('#####################')
        print('CHECKPOINT LOADED.')
        print('#####################')

    iteration = 0
    model.train()
    print(f"Training Start!!! ({args.logdir})")
    while iteration < (hp.train_steps):
        for i, batch in enumerate(train_loader):
            text_padded, text_lengths, mel_padded, mel_lengths = [
                x.cuda() for x in batch
            ]
            recon_loss, kl_loss, duration_loss, align_loss = model(
                text_padded, mel_padded, text_lengths, mel_lengths)

            alpha = min(1, iteration / hp.kl_warmup_steps)
            scaled_loss = recon_loss + alpha * kl_loss + duration_loss + align_loss
            scaled_loss.backward()
            #with amp.scale_loss((recon_loss + alpha*kl_loss + duration_loss + align_loss), optimizer) as scaled_loss:
            #    scaled_loss.backward()

            iteration += 1
            lr_scheduling(optimizer, iteration)
            nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh)
            optimizer.step()
            model.zero_grad()
            writer.add_scalar('train_recon_loss',
                              recon_loss,
                              global_step=iteration)
            writer.add_scalar('train_kl_loss', kl_loss, global_step=iteration)
            writer.add_scalar('train_duration_loss',
                              duration_loss,
                              global_step=iteration)
            writer.add_scalar('train_align_loss',
                              align_loss,
                              global_step=iteration)

            sys.stdout.write(
                '\r[Iteration] {}/{} [recon_loss] {} [kl_loss] {} [duration_loss] {} [align_loss] {}'
                .format(iteration, hp.train_steps, recon_loss, alpha * kl_loss,
                        duration_loss, align_loss))

            if iteration % (hp.iters_per_validation) == 0:
                validate(model, val_loader, iteration, writer)

            if iteration % (hp.iters_per_checkpoint) == 0:
                save_checkpoint(
                    model,
                    optimizer,
                    hp.lr,
                    iteration,
                    filepath=f'{hp.output_directory}/{args.logdir}')

            if iteration == (hp.train_steps):
                break
コード例 #5
0
data_type = 'phone'
checkpoint_path = f"training_log/aligntts/stage0/checkpoint_40000"

from glob import glob

# checkpoint_path = sorted(glob("training_log/aligntts/stage0/checkpoint_*"))[0]
checkpoint_path = "training_log/aligntts/stage0/checkpoint_40000"

print(checkpoint_path)

state_dict = {}
for k, v in torch.load(checkpoint_path)['state_dict'].items():
    state_dict[k[7:]] = v

model = Model(hparams).cuda()
model.load_state_dict(state_dict)
_ = model.cuda().eval()
criterion = MDNLoss()

import time

datasets = ['train', 'val', 'test']
batch_size = 64
batch_size = 1

start = time.perf_counter()

for dataset in datasets:

    with open(f'filelists/ljs_audio_text_{dataset}_filelist.txt',
              'r',
コード例 #6
0
ファイル: train.py プロジェクト: ChenX17/aligntts
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(
        hparams, stage=args.stage)
    initial_iteration = None
    if args.stage != 0:
        checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}"

        if not os.path.isfile(checkpoint_path):
            print(f'{checkpoint_path} does not exist')
            checkpoint_path = sorted(
                glob(f"training_log/aligntts/stage{args.stage-1}/checkpoint_*")
            )[-1]
            print(f'Loading {checkpoint_path} instead')

        state_dict = {}
        for k, v in torch.load(checkpoint_path)['state_dict'].items():
            state_dict[k[7:]] = v

        model = Model(hparams).cuda()
        model.load_state_dict(state_dict)
        model = nn.DataParallel(model).cuda()
    else:
        if args.pre_trained_model != '':
            if not os.path.isfile(args.pre_trained_model):
                print(f'{args.pre_trained_model} does not exist')

            state_dict = {}
            for k, v in torch.load(
                    args.pre_trained_model)['state_dict'].items():
                state_dict[k[7:]] = v
            initial_iteration = torch.load(args.pre_trained_model)['iteration']
            model = Model(hparams).cuda()
            model.load_state_dict(state_dict)
            model = nn.DataParallel(model).cuda()
        else:

            model = nn.DataParallel(Model(hparams)).cuda()

    criterion = MDNLoss()
    writer = get_writer(hparams.output_directory,
                        f'{hparams.log_directory}/stage{args.stage}')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams.lr,
                                 betas=(0.9, 0.98),
                                 eps=1e-09)
    iteration, loss = 0, 0
    if initial_iteration is not None:
        iteration = initial_iteration
    model.train()

    print(f'Stage{args.stage} Start!!! ({str(datetime.now())})')
    while True:
        for i, batch in enumerate(train_loader):
            if args.stage == 0:
                text_padded, mel_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]
                align_padded = None
            else:
                text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]

            sub_loss = model(text_padded,
                             mel_padded,
                             align_padded,
                             text_lengths,
                             mel_lengths,
                             criterion,
                             stage=args.stage,
                             log_viterbi=args.log_viterbi,
                             cpu_viterbi=args.cpu_viterbi)
            sub_loss = sub_loss.mean() / hparams.accumulation
            sub_loss.backward()
            loss = loss + sub_loss.item()
            iteration += 1
            if iteration % 100 == 0:
                print(
                    f'[{str(datetime.now())}] Stage {args.stage} Iter {iteration:<6d} Loss {loss:<8.6f}'
                )

            if iteration % hparams.accumulation == 0:
                lr_scheduling(optimizer, iteration // hparams.accumulation)
                nn.utils.clip_grad_norm_(model.parameters(),
                                         hparams.grad_clip_thresh)
                optimizer.step()
                model.zero_grad()
                writer.add_scalar('Train loss', loss,
                                  iteration // hparams.accumulation)
                writer.add_scalar('Learning rate', get_lr(optimizer),
                                  iteration // hparams.accumulation)
                loss = 0

            if iteration % (hparams.iters_per_validation *
                            hparams.accumulation) == 0:
                validate(model, criterion, val_loader, iteration, writer,
                         args.stage)

            if iteration % (hparams.iters_per_checkpoint *
                            hparams.accumulation) == 0:
                save_checkpoint(
                    model,
                    optimizer,
                    hparams.lr,
                    iteration // hparams.accumulation,
                    filepath=
                    f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}'
                )

            if iteration == (hparams.train_steps[args.stage] *
                             hparams.accumulation):
                break

        if iteration == (hparams.train_steps[args.stage] *
                         hparams.accumulation):
            break

    print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
コード例 #7
0
def testing(non_adapted_model_dir, adapted_model_dir, classifier_dir,
            nb_clss_labels, feat_path, labels_path, device, src_batch_size,
            trgt_batch_size):
    """Implements the complete test process of the AUDASC method

    :param non_adapted_model_dir: directory of non adapted model
    :param adapted_model_dir: directory of adapted model
    :param classifier_dir: directory of classifier
    :param nb_clss_labels: number of acoustic scene classes
    :param feat_path: directory of test features
    :param labels_path: directory of test labels
    :param device: The device that will be used.
    :param src_batch_size: source batch size
    :param trgt_batch_size: target batch size
    """
    non_adapted_cnn = Model().to(device)
    non_adapted_cnn.load_state_dict(
        torch.load(path.join(non_adapted_model_dir,
                             'non_adapted_cnn.pytorch')))

    adapted_cnn = Model().to(device)
    adapted_cnn.load_state_dict(
        torch.load(path.join(adapted_model_dir, 'target_cnn.pytorch')))

    label_classifier = LabelClassifier(nb_clss_labels).to(device)
    label_classifier.load_state_dict(
        torch.load(path.join(classifier_dir, 'label_classifier.pytorch')))

    non_adapted_cnn.train(False)
    adapted_cnn.train(False)
    label_classifier.train(False)

    feat = file_io.load_pickled_features(feat_path)
    labels = file_io.load_pickled_features(labels_path)

    non_adapted_acc = {}
    adapted_acc = {}

    '********************************************'
    '** testing for all data, device A, B, & C **'
    '********************************************'

    # testing on source data
    src_batch_feat, src_batch_labels = \
        test_step.test_data_mini_batch(feat['A'].to(device), labels['A'].to(device), batch_size=src_batch_size)
    non_adapted_src_correct, adapted_src_correct, src_temp = \
        test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, src_batch_feat, src_batch_labels)

    non_adapted_src_len = src_temp * src_batch_size
    adapted_src_len = src_temp * src_batch_size

    # testing on target data
    target_feat = torch.cat([feat['B'], feat['C']], dim=0).to(device)
    target_labels = torch.cat([labels['B'], labels['C']], dim=0).to(device)

    trgt_batch_feat, trgt_batch_labels =\
        test_step.test_data_mini_batch(target_feat, target_labels, batch_size=trgt_batch_size)
    non_adapted_tgt_correct, adapted_tgt_correct, trgt_temp = \
        test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, trgt_batch_feat, trgt_batch_labels)

    non_adapted_tgt_len = trgt_temp * trgt_batch_size
    adapted_tgt_len = trgt_temp * trgt_batch_size

    # calculating the accuracy of both models on data from device A
    non_adapted_acc['A'] = math_funcs.to_percentage(non_adapted_src_correct,
                                                    non_adapted_src_len)
    adapted_acc['A'] = math_funcs.to_percentage(adapted_src_correct,
                                                adapted_src_len)

    # calculating the accuracy of both models on data from devices B & C
    non_adapted_acc['BC'] = math_funcs.to_percentage(non_adapted_tgt_correct,
                                                     non_adapted_tgt_len)
    adapted_acc['BC'] = math_funcs.to_percentage(adapted_tgt_correct,
                                                 adapted_tgt_len)

    # calculating the accuracy of both models on data from all devices
    non_adapted_beta, non_adapted_alpha = math_funcs.weighting_factors(
        non_adapted_src_len, non_adapted_tgt_len)
    adapted_beta, adapted_alpha = math_funcs.weighting_factors(
        adapted_src_len, adapted_tgt_len)

    non_adapted_weighted_acc = (non_adapted_beta * non_adapted_acc['A']) + (
        non_adapted_alpha * non_adapted_acc['BC'])
    adapted_weighted_acc = (adapted_beta * adapted_acc['A']) + (
        adapted_alpha * adapted_acc['BC'])

    non_adapted_acc['all'] = non_adapted_weighted_acc
    adapted_acc['all'] = adapted_weighted_acc

    printing.testing_result_msg(non_adapted_acc,
                                adapted_acc,
                                ending='\n',
                                flushing=True)