def main(args): train_loader, val_loader, collate_fn = prepare_dataloaders(hparams, stage=args.stage) if args.stage!=0: checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}" state_dict = {} for k, v in torch.load(checkpoint_path)['state_dict'].items(): state_dict[k[7:]]=v model = Model(hparams).cuda() model.load_state_dict(state_dict) model = nn.DataParallel(model).cuda() else: model = nn.DataParallel(Model(hparams)).cuda() criterion = MDNLoss() writer = get_writer(hparams.output_directory, f'{hparams.log_directory}/stage{args.stage}') optimizer = torch.optim.Adam(model.parameters(), lr=hparams.lr, betas=(0.9, 0.98), eps=1e-09) iteration, loss = 0, 0 model.train() print(f'Stage{args.stage} Start!!! ({str(datetime.now())})') while True: for i, batch in enumerate(train_loader): if args.stage==0: text_padded, mel_padded, text_lengths, mel_lengths = [ reorder_batch(x, hparams.n_gpus).cuda() for x in batch ] align_padded=None else: text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [ reorder_batch(x, hparams.n_gpus).cuda() for x in batch ] sub_loss = model(text_padded, mel_padded, align_padded, text_lengths, mel_lengths, criterion, stage=args.stage) sub_loss = sub_loss.mean()/hparams.accumulation sub_loss.backward() loss = loss+sub_loss.item() iteration += 1 if iteration%hparams.accumulation == 0: lr_scheduling(optimizer, iteration//hparams.accumulation) nn.utils.clip_grad_norm_(model.parameters(), hparams.grad_clip_thresh) optimizer.step() model.zero_grad() writer.add_scalar('Train loss', loss, iteration//hparams.accumulation) loss=0 if iteration%(hparams.iters_per_validation*hparams.accumulation)==0: validate(model, criterion, val_loader, iteration, writer, args.stage) if iteration%(hparams.iters_per_checkpoint*hparams.accumulation)==0: save_checkpoint(model, optimizer, hparams.lr, iteration//hparams.accumulation, filepath=f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}') if iteration==(hparams.train_steps[args.stage]*hparams.accumulation): break if iteration==(hparams.train_steps[args.stage]*hparams.accumulation): break print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
def main(): data_type = 'phone' checkpoint_path = f"training_log/aligntts/stage0/checkpoint_{hparams.train_steps[0]}" state_dict = {} for k, v in torch.load(checkpoint_path)['state_dict'].items(): state_dict[k[7:]] = v model = Model(hparams).cuda() model.load_state_dict(state_dict) _ = model.cuda().eval() criterion = MDNLoss() #datasets = ['train', 'val', 'test'] datasets = ['train'] batch_size = 64 for dataset in datasets: #with open(f'filelists/ljs_audio_text_{dataset}_filelist.txt', 'r') as f: with open(f'/hd0/speech-aligner/metadata/metadata.csv', 'r') as f: lines_raw = [line.split('|') for line in f.read().splitlines()] lines_list = [ lines_raw[batch_size * i:batch_size * (i + 1)] for i in range(len(lines_raw) // batch_size + 1) ] for batch in tqdm(lines_list): file_list, text_list, mel_list = [], [], [] text_lengths, mel_lengths = [], [] for i in range(len(batch)): file_name, _, text = batch[i] file_name = os.path.splitext(file_name)[0] file_list.append(file_name) seq = os.path.join( '/hd0/speech-aligner/preprocessed/VCTK20_engspks', f'{data_type}_seq') mel = os.path.join( '/hd0/speech-aligner/preprocessed/VCTK20_engspks', 'melspectrogram') seq = torch.from_numpy( np.load(f'{seq}/{file_name}_sequence.npy')) mel = torch.from_numpy( np.load(f'{mel}/{file_name}_melspectrogram.npy')) text_list.append(seq) mel_list.append(mel) text_lengths.append(seq.size(0)) mel_lengths.append(mel.size(1)) text_lengths = torch.LongTensor(text_lengths) mel_lengths = torch.LongTensor(mel_lengths) text_padded = torch.zeros(len(batch), text_lengths.max().item(), dtype=torch.long) mel_padded = torch.zeros(len(batch), hparams.n_mel_channels, mel_lengths.max().item()) for j in range(len(batch)): text_padded[j, :text_list[j].size(0)] = text_list[j] mel_padded[j, :, :mel_list[j].size(1)] = mel_list[j] text_padded = text_padded.cuda() mel_padded = mel_padded.cuda() mel_padded = ( torch.clamp(mel_padded, hparams.min_db, hparams.max_db) - hparams.min_db) / (hparams.max_db - hparams.min_db) text_lengths = text_lengths.cuda() mel_lengths = mel_lengths.cuda() with torch.no_grad(): encoder_input = model.Prenet(text_padded) hidden_states, _ = model.FFT_lower(encoder_input, text_lengths) mu_sigma = model.get_mu_sigma(hidden_states) _, log_prob_matrix = criterion(mu_sigma, mel_padded, text_lengths, mel_lengths) align = model.viterbi(log_prob_matrix, text_lengths, mel_lengths).to(torch.long) alignments = list(torch.split(align, 1)) for j, (l, t) in enumerate(zip(text_lengths, mel_lengths)): alignments[j] = alignments[j][0, :l.item(), :t.item()].sum( dim=-1) os.makedirs( "/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{}" .format(file_list[j].split('/')[0]), exist_ok=True) np.save( f'/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{file_list[j]}_alignment.npy', alignments[j].detach().cpu().numpy()) # plt.imshow(align[j].detach().cpu().numpy()) # plt.gca().invert_yaxis() # plt.savefig(f"/hd0/speech-aligner/preprocessed/VCTK20_engspks/alignments/{file_list[j]}_alignment.png", format='png') print("Alignments Extraction End!!! ({datetime.now()})")
def training_process(device, nb_class_labels, model_path, result_dir, patience, epochs, do_pre_train, tr_feat_path, tr_labels_path, val_feat_path, val_labels_path, tr_batch_size, val_batch_size, adapt_patience, adapt_epochs, d_lr, tgt_lr, update_cnt, factor): """Implements the complete training process of the AUDASC method. :param device: The device that we will use. :type device: str :param nb_class_labels: The amount of labels for label classification. :type nb_class_labels: int :param model_path: The path of previously saved model (if any) :type model_path: str :param result_dir: The directory to save newly pre-trained model. :type result_dir: str :param patience: The patience for the pre-training step. :type patience: int :param epochs: The epochs for the pre-training step. :type epochs: int :param do_pre_train: Flag to indicate if we do pre-training. :type do_pre_train: bool :param tr_feat_path: The path for loading the training features. :type tr_feat_path: str :param tr_labels_path: The path for loading the training labels. :type tr_labels_path: str :param val_feat_path: The path for loading the validation features. :type val_feat_path: str :param val_labels_path: The path for loading the validation labels. :type val_labels_path: str :param tr_batch_size: The batch used for pre-training. :type tr_batch_size: int :param val_batch_size: The batch size used for validation. :type val_batch_size: int :param adapt_patience: The patience for the domain adaptation step. :type adapt_patience: int :param adapt_epochs: The epochs for the domain adaptation step. :type adapt_epochs: int :param d_lr: The learning rate for the discriminator. :type d_lr: float :param tgt_lr: The learning rate for the adapted model. :type tgt_lr: float :param update_cnt: An update controller for adversarial loss :type update_cnt: int :param factor: the coefficient used to be multiplied by classification loss. :type factor: int """ tr_feat = device_exchange(file_io.load_pickled_features(tr_feat_path), device=device) tr_labels = device_exchange(file_io.load_pickled_features(tr_labels_path), device=device) val_feat = device_exchange(file_io.load_pickled_features(val_feat_path), device=device) val_labels = device_exchange( file_io.load_pickled_features(val_labels_path), device=device) loss_func = functional.cross_entropy non_adapted_cnn = Model().to(device) label_classifier = LabelClassifier(nb_class_labels).to(device) if not path.exists(result_dir): makedirs(result_dir) if do_pre_train: state_dict_path = result_dir printing.info_msg('Pre-training step') optimizer_source = torch.optim.Adam( list(non_adapted_cnn.parameters()) + list(label_classifier.parameters()), lr=1e-4) pre_training.pre_training(model=non_adapted_cnn, label_classifier=label_classifier, optimizer=optimizer_source, tr_batch_size=tr_batch_size, val_batch_size=val_batch_size, tr_feat=tr_feat['A'], tr_labels=tr_labels['A'], val_feat=val_feat['A'], val_labels=val_labels['A'], epochs=epochs, criterion=loss_func, patience=patience, result_dir=state_dict_path) del optimizer_source else: printing.info_msg('Loading a pre-trained non-adapted model') state_dict_path = model_path if not path.exists(state_dict_path): raise ValueError( 'The path for loading the pre trained model does not exist!') non_adapted_cnn.load_state_dict( torch.load(path.join(state_dict_path, 'non_adapted_cnn.pytorch'))) label_classifier.load_state_dict( torch.load(path.join(state_dict_path, 'label_classifier.pytorch'))) printing.info_msg('Training the Adversarial Adaptation Model') target_cnn = Model().to(device) target_cnn.load_state_dict(non_adapted_cnn.state_dict()) discriminator = Discriminator(2).to(device) target_model_opt = torch.optim.Adam(target_cnn.parameters(), lr=tgt_lr) discriminator_opt = torch.optim.Adam(discriminator.parameters(), lr=d_lr) domain_adaptation.domain_adaptation( non_adapted_cnn, target_cnn, label_classifier, discriminator, target_model_opt, discriminator_opt, loss_func, loss_func, loss_func, tr_feat, tr_labels, val_feat, val_labels, adapt_epochs, update_cnt, result_dir, adapt_patience, device, factor)
def main(args): train_loader, val_loader, collate_fn = prepare_dataloaders(hp) model = Model(hp).cuda() optimizer = torch.optim.Adamax(model.parameters(), lr=hp.lr) writer = get_writer(hp.output_directory, args.logdir) #model, optimizer = amp.initialize(model, optimizer, opt_level="O1") ### Load trained checkpoint ### if args.checkpoint_path != '': model.load_state_dict(torch.load(args.checkpoint_path)['state_dict']) print('#####################') print('CHECKPOINT LOADED.') print('#####################') iteration = 0 model.train() print(f"Training Start!!! ({args.logdir})") while iteration < (hp.train_steps): for i, batch in enumerate(train_loader): text_padded, text_lengths, mel_padded, mel_lengths = [ x.cuda() for x in batch ] recon_loss, kl_loss, duration_loss, align_loss = model( text_padded, mel_padded, text_lengths, mel_lengths) alpha = min(1, iteration / hp.kl_warmup_steps) scaled_loss = recon_loss + alpha * kl_loss + duration_loss + align_loss scaled_loss.backward() #with amp.scale_loss((recon_loss + alpha*kl_loss + duration_loss + align_loss), optimizer) as scaled_loss: # scaled_loss.backward() iteration += 1 lr_scheduling(optimizer, iteration) nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh) optimizer.step() model.zero_grad() writer.add_scalar('train_recon_loss', recon_loss, global_step=iteration) writer.add_scalar('train_kl_loss', kl_loss, global_step=iteration) writer.add_scalar('train_duration_loss', duration_loss, global_step=iteration) writer.add_scalar('train_align_loss', align_loss, global_step=iteration) sys.stdout.write( '\r[Iteration] {}/{} [recon_loss] {} [kl_loss] {} [duration_loss] {} [align_loss] {}' .format(iteration, hp.train_steps, recon_loss, alpha * kl_loss, duration_loss, align_loss)) if iteration % (hp.iters_per_validation) == 0: validate(model, val_loader, iteration, writer) if iteration % (hp.iters_per_checkpoint) == 0: save_checkpoint( model, optimizer, hp.lr, iteration, filepath=f'{hp.output_directory}/{args.logdir}') if iteration == (hp.train_steps): break
data_type = 'phone' checkpoint_path = f"training_log/aligntts/stage0/checkpoint_40000" from glob import glob # checkpoint_path = sorted(glob("training_log/aligntts/stage0/checkpoint_*"))[0] checkpoint_path = "training_log/aligntts/stage0/checkpoint_40000" print(checkpoint_path) state_dict = {} for k, v in torch.load(checkpoint_path)['state_dict'].items(): state_dict[k[7:]] = v model = Model(hparams).cuda() model.load_state_dict(state_dict) _ = model.cuda().eval() criterion = MDNLoss() import time datasets = ['train', 'val', 'test'] batch_size = 64 batch_size = 1 start = time.perf_counter() for dataset in datasets: with open(f'filelists/ljs_audio_text_{dataset}_filelist.txt', 'r',
def main(args): train_loader, val_loader, collate_fn = prepare_dataloaders( hparams, stage=args.stage) initial_iteration = None if args.stage != 0: checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}" if not os.path.isfile(checkpoint_path): print(f'{checkpoint_path} does not exist') checkpoint_path = sorted( glob(f"training_log/aligntts/stage{args.stage-1}/checkpoint_*") )[-1] print(f'Loading {checkpoint_path} instead') state_dict = {} for k, v in torch.load(checkpoint_path)['state_dict'].items(): state_dict[k[7:]] = v model = Model(hparams).cuda() model.load_state_dict(state_dict) model = nn.DataParallel(model).cuda() else: if args.pre_trained_model != '': if not os.path.isfile(args.pre_trained_model): print(f'{args.pre_trained_model} does not exist') state_dict = {} for k, v in torch.load( args.pre_trained_model)['state_dict'].items(): state_dict[k[7:]] = v initial_iteration = torch.load(args.pre_trained_model)['iteration'] model = Model(hparams).cuda() model.load_state_dict(state_dict) model = nn.DataParallel(model).cuda() else: model = nn.DataParallel(Model(hparams)).cuda() criterion = MDNLoss() writer = get_writer(hparams.output_directory, f'{hparams.log_directory}/stage{args.stage}') optimizer = torch.optim.Adam(model.parameters(), lr=hparams.lr, betas=(0.9, 0.98), eps=1e-09) iteration, loss = 0, 0 if initial_iteration is not None: iteration = initial_iteration model.train() print(f'Stage{args.stage} Start!!! ({str(datetime.now())})') while True: for i, batch in enumerate(train_loader): if args.stage == 0: text_padded, mel_padded, text_lengths, mel_lengths = [ reorder_batch(x, hparams.n_gpus).cuda() for x in batch ] align_padded = None else: text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [ reorder_batch(x, hparams.n_gpus).cuda() for x in batch ] sub_loss = model(text_padded, mel_padded, align_padded, text_lengths, mel_lengths, criterion, stage=args.stage, log_viterbi=args.log_viterbi, cpu_viterbi=args.cpu_viterbi) sub_loss = sub_loss.mean() / hparams.accumulation sub_loss.backward() loss = loss + sub_loss.item() iteration += 1 if iteration % 100 == 0: print( f'[{str(datetime.now())}] Stage {args.stage} Iter {iteration:<6d} Loss {loss:<8.6f}' ) if iteration % hparams.accumulation == 0: lr_scheduling(optimizer, iteration // hparams.accumulation) nn.utils.clip_grad_norm_(model.parameters(), hparams.grad_clip_thresh) optimizer.step() model.zero_grad() writer.add_scalar('Train loss', loss, iteration // hparams.accumulation) writer.add_scalar('Learning rate', get_lr(optimizer), iteration // hparams.accumulation) loss = 0 if iteration % (hparams.iters_per_validation * hparams.accumulation) == 0: validate(model, criterion, val_loader, iteration, writer, args.stage) if iteration % (hparams.iters_per_checkpoint * hparams.accumulation) == 0: save_checkpoint( model, optimizer, hparams.lr, iteration // hparams.accumulation, filepath= f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}' ) if iteration == (hparams.train_steps[args.stage] * hparams.accumulation): break if iteration == (hparams.train_steps[args.stage] * hparams.accumulation): break print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
def testing(non_adapted_model_dir, adapted_model_dir, classifier_dir, nb_clss_labels, feat_path, labels_path, device, src_batch_size, trgt_batch_size): """Implements the complete test process of the AUDASC method :param non_adapted_model_dir: directory of non adapted model :param adapted_model_dir: directory of adapted model :param classifier_dir: directory of classifier :param nb_clss_labels: number of acoustic scene classes :param feat_path: directory of test features :param labels_path: directory of test labels :param device: The device that will be used. :param src_batch_size: source batch size :param trgt_batch_size: target batch size """ non_adapted_cnn = Model().to(device) non_adapted_cnn.load_state_dict( torch.load(path.join(non_adapted_model_dir, 'non_adapted_cnn.pytorch'))) adapted_cnn = Model().to(device) adapted_cnn.load_state_dict( torch.load(path.join(adapted_model_dir, 'target_cnn.pytorch'))) label_classifier = LabelClassifier(nb_clss_labels).to(device) label_classifier.load_state_dict( torch.load(path.join(classifier_dir, 'label_classifier.pytorch'))) non_adapted_cnn.train(False) adapted_cnn.train(False) label_classifier.train(False) feat = file_io.load_pickled_features(feat_path) labels = file_io.load_pickled_features(labels_path) non_adapted_acc = {} adapted_acc = {} '********************************************' '** testing for all data, device A, B, & C **' '********************************************' # testing on source data src_batch_feat, src_batch_labels = \ test_step.test_data_mini_batch(feat['A'].to(device), labels['A'].to(device), batch_size=src_batch_size) non_adapted_src_correct, adapted_src_correct, src_temp = \ test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, src_batch_feat, src_batch_labels) non_adapted_src_len = src_temp * src_batch_size adapted_src_len = src_temp * src_batch_size # testing on target data target_feat = torch.cat([feat['B'], feat['C']], dim=0).to(device) target_labels = torch.cat([labels['B'], labels['C']], dim=0).to(device) trgt_batch_feat, trgt_batch_labels =\ test_step.test_data_mini_batch(target_feat, target_labels, batch_size=trgt_batch_size) non_adapted_tgt_correct, adapted_tgt_correct, trgt_temp = \ test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, trgt_batch_feat, trgt_batch_labels) non_adapted_tgt_len = trgt_temp * trgt_batch_size adapted_tgt_len = trgt_temp * trgt_batch_size # calculating the accuracy of both models on data from device A non_adapted_acc['A'] = math_funcs.to_percentage(non_adapted_src_correct, non_adapted_src_len) adapted_acc['A'] = math_funcs.to_percentage(adapted_src_correct, adapted_src_len) # calculating the accuracy of both models on data from devices B & C non_adapted_acc['BC'] = math_funcs.to_percentage(non_adapted_tgt_correct, non_adapted_tgt_len) adapted_acc['BC'] = math_funcs.to_percentage(adapted_tgt_correct, adapted_tgt_len) # calculating the accuracy of both models on data from all devices non_adapted_beta, non_adapted_alpha = math_funcs.weighting_factors( non_adapted_src_len, non_adapted_tgt_len) adapted_beta, adapted_alpha = math_funcs.weighting_factors( adapted_src_len, adapted_tgt_len) non_adapted_weighted_acc = (non_adapted_beta * non_adapted_acc['A']) + ( non_adapted_alpha * non_adapted_acc['BC']) adapted_weighted_acc = (adapted_beta * adapted_acc['A']) + ( adapted_alpha * adapted_acc['BC']) non_adapted_acc['all'] = non_adapted_weighted_acc adapted_acc['all'] = adapted_weighted_acc printing.testing_result_msg(non_adapted_acc, adapted_acc, ending='\n', flushing=True)