def tts_train_loop(model, optimizer, train_set, lr, total_steps): for p in optimizer.param_groups: p['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 for e in range(epochs): start = time.time() running_loss = 0 for i, (x, m, _, _) in enumerate(train_set, 1): optimizer.zero_grad() x, m = x.cuda(), m.cuda() m1_hat, m2_hat, attention = model(x, m) m1_loss = F.l1_loss(m1_hat, m) m2_loss = F.l1_loss(m2_hat, m) loss = m1_loss + m2_loss running_loss += loss.item() loss.backward() if hp.tts_clip_grad_norm: torch.nn.utils.clip_grad_norm_(model.parameters(), hp.tts_clip_grad_norm) optimizer.step() step = model.get_step() k = step // 1000 speed = i / (time.time() - start) avg_loss = running_loss / i if step % hp.tts_checkpoint_every == 0: model.checkpoint(paths.tts_checkpoints) if step % hp.tts_plot_every == 0: save_attention(attention[0], f'{paths.tts_attention}{k}k') msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | ' stream(msg) model.save(paths.tts_latest_weights) model.log(paths.tts_log, msg) print(' ')
def voc_train_loop(model, loss_func, optimiser, train_set, test_set, lr, total_steps): for p in optimiser.param_groups: p['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 for e in range(1, epochs + 1): start = time.time() running_loss = 0. for i, (x, y, m, s_e) in enumerate(train_set, 1): x, m, y, spk_embd = x.cuda(), m.cuda(), y.cuda(), s_e.cuda() y_hat = model(x, m, spk_embd) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) optimiser.zero_grad() loss.backward() optimiser.step() running_loss += loss.item() speed = i / (time.time() - start) avg_loss = running_loss / i step = model.get_step() k = step // 1000 if step % hp.voc_checkpoint_every == 0: gen_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output) model.checkpoint(paths.voc_checkpoints) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) model.save(paths.voc_latest_weights) model.log(paths.voc_log, msg) print(' ')
def train_loop(model, optimiser, train_set, test_set, lr): for p in optimiser.param_groups: p['lr'] = lr total_iters = len(train_set) epochs = (hp.total_steps - model.get_step()) // total_iters + 1 for e in range(1, epochs + 1): start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.cuda(), m.cuda(), y.cuda() y_hat = model(x, m) y_hat = y_hat.transpose(1, 2).unsqueeze(-1) y = y.unsqueeze(-1) loss = F.cross_entropy(y_hat, y) optimiser.zero_grad() loss.backward() optimiser.step() running_loss += loss.item() speed = i / (time.time() - start) avg_loss = running_loss / i step = model.get_step() k = step // 1000 if step % hp.checkpoint_every == 0: gen_testset(model, test_set, hp.test_samples, hp.batched, hp.target, hp.overlap, paths.output) model.checkpoint(paths.checkpoints) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:#.4} | {speed:#.2} steps/s | Step: {k}k | ' stream(msg) model.save(paths.latest_weights) model.log(paths.log, msg) print(' ')
def evaluate(self, model, val_set, msg) -> float: model.tacotron.eval() val_loss = 0 device = next(model.tacotron.parameters()).device for i, batch in enumerate(val_set, 1): stream(msg + f'| Evaluating {i}/{len(val_set)}') seqs, mels, stops, ids, lens = batch seqs, mels, stops, lens = \ seqs.to(device), mels.to(device), stops.to(device), lens.to(device) with torch.no_grad(): pred = model.tacotron(seqs, mels) lin_mels, post_mels, att = pred lin_loss = F.l1_loss(lin_mels, mels) post_loss = F.l1_loss(post_mels, mels) val_loss += lin_loss + post_loss if i == 1: self.generate_samples(model, batch, pred) val_loss /= len(val_set) return float(val_loss)
def create_gta_features(model, train_set, save_path): iters = len(train_set) for i, (x, mels, ids, mel_lens) in enumerate(train_set, 1): x, mels = x.cuda(), mels.cuda() with torch.no_grad(): _, gta, _ = model(x, mels) gta = gta.cpu().numpy() for j in range(len(ids)): mel = gta[j][:, :mel_lens[j]] mel = (mel + 4) / 8 id = ids[j] np.save(f'{save_path}{id}.npy', mel, allow_pickle=False) bar = progbar(i, iters) msg = f'{bar} {i}/{iters} Batches ' stream(msg)
def train_session(self, model: ModelPackage, session: Session): model.r = session.r cfg = self.cfg tacotron, gan = model.tacotron, model.gan taco_opti, gen_opti, disc_opti = \ model.taco_opti, model.gen_opti, model.disc_opti device = next(tacotron.parameters()).device display_params([('Session', session.index), ('Reduction', session.r), ('Max Step', session.max_step), ('Learning Rate', session.lr), ('Batch Size', session.bs), ('Steps per Epoch', len(session.train_set))]) for g in taco_opti.param_groups: g['lr'] = session.lr loss_avg = Averager() duration_avg = Averager() while tacotron.get_step() <= session.max_step: for i, (seqs, mels, stops, ids, lens) in enumerate(session.train_set): seqs, mels, stops, lens = \ seqs.to(device), mels.to(device), stops.to(device), lens.to(device) t_start = time.time() block_step = tacotron.get_step() % cfg.steps_to_eval + 1 tacotron.train() lin_mels, post_mels, att = tacotron(seqs, mels) lin_loss = self.criterion(lin_mels, mels, lens) post_loss = self.criterion(post_mels, mels, lens) loss = lin_loss + post_loss loss_avg.add(loss) taco_opti.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(tacotron.parameters(), 1.0) taco_opti.step() duration_avg.add(time.time() - t_start) steps_per_s = 1. / duration_avg.get() self.writer.add_scalar('Loss/train', loss, tacotron.get_step()) self.writer.add_scalar('Params/reduction_factor', session.r, tacotron.get_step()) self.writer.add_scalar('Params/batch_sze', session.bs, tacotron.get_step()) self.writer.add_scalar('Params/learning_rate', session.lr, tacotron.get_step()) msg = f'{block_step}/{cfg.steps_to_eval} | Step: {tacotron.get_step()} ' \ f'| {steps_per_s:#.2} steps/s | Avg. Loss: {loss_avg.get():#.4} ' stream(msg) if tacotron.step % cfg.steps_to_checkpoint == 0: self.save_model(model, step=tacotron.get_step()) if tacotron.step % self.cfg.steps_to_eval == 0: val_loss = self.evaluate(model, session.val_set, msg) self.writer.add_scalar('Loss/val', val_loss, tacotron.step) self.save_model(model) stream(msg + f'| Val Loss: {float(val_loss):#0.4} \n') loss_avg.reset() duration_avg.reset() if tacotron.step > session.max_step: return
def dual_transform(self, model_tts, model_asr, optimizer_tts, optimizer_asr, asr_test_set, m_loss_avg, dur_loss_avg, device, asr_current_step, e, epochs, duration_avg, total_iters, tts_s_loss, asr_s_loss, tts_lr, tts_dt_path): print('\n\nStarting DualTransformation loop...\n') # exit() tmp_dir = './checkpoints/sme_speech_tts.asr_forward/dual_transform_tmp' os.makedirs(tmp_dir, exist_ok=True) # generate tmp ASR training data asr_train_data = [] input_set = get_unpaired_txt(35) # print(input_set) text = [clean_text(v) for v in input_set] inputs = [text_to_sequence(t) for t in text] # generate unpaired data for ASR from TTS for i, x in enumerate(inputs, 1): _, m, dur = model_tts.generate(x, alpha=1.) wav = reconstruct_waveform(m, n_iter=32) wav_path = os.path.join(tmp_dir, f'{i}.wav') save_wav(wav, wav_path) asr_train_data.append((wav_path, text[i - 1])) # print(asr_train_data) dt_asr_data = load_dt_data(asr_train_data) # reinit trainer with only tmp train data asr_trainer_dt = init_trainer(dt_asr_data, None) dt_train = asr_trainer_dt.get_train_dataloader() # unsuper train loop for ASR for step, inputs in enumerate(dt_train, 1): # model_asr.cpu() model_asr.train() model_asr.to(device) for k, v in inputs.items(): if isinstance(v, torch.Tensor): inputs[k] = v.to(device) # model_asr.train() outputs = model_asr(**inputs) asr_u_loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0] # asr_u_loss.detach() # asr_u_loss = asr_s_loss.mean() # model_name = step + asr_current_step msg_asr = f'| ASR MODEL (unsupervised training) : '\ f'| Epoch: {e}/{epochs} ({step}/{len(dt_train)}) | Loss ASR: {asr_u_loss:#.4} '\ f' ||||||||||||||||||||||||||||||||||||||||||||||||' stream(msg_asr) # for f in os.listdir(tmp_dir): # file_path = os.path.join(tmp_dir, f) # if f.endswith('.wav'): # os.unlink(file_path) # generate tmp TTS data from ASR # model_asr.to(device) asr_predict_for_dt(model_asr) subprocess.check_output( 'python preprocess.py -p "./data/speech-sme-tts" -d=True', shell=True, stderr=subprocess.STDOUT) print('Finished preprocessing for tmp data!') tmp_tts_train = get_tts_datasets(tts_dt_path, batch_size=2, r=1, model_type='forward_dt') print("Loaded tmp dataset!") # unsuper TTS training for i, (x, m, ids, x_lens, mel_lens, dur) in enumerate(tmp_tts_train, 1): start = time.time() model_tts.to(device) model_tts.train() # optimizer_tts.zero_grad() x, m, dur, x_lens, mel_lens = x.to(device), m.to(device), dur.to(device),\ x_lens.to(device), mel_lens.to(device) m1_hat, m2_hat, dur_hat = model_tts(x, m, dur, mel_lens) m1_loss = self.l1_loss(m1_hat, m, mel_lens) m2_loss = self.l1_loss(m2_hat, m, mel_lens) dur_loss = self.l1_loss(dur_hat.unsqueeze(1), dur.unsqueeze(1), x_lens) tts_u_loss = m1_loss + m2_loss + 0.1 * dur_loss # optimizer_tts.zero_grad() # tts_u_loss.backward() torch.nn.utils.clip_grad_norm_(model_tts.parameters(), hp.tts_clip_grad_norm) # optimizer_tts.step() m_loss_avg.add(m1_loss.item() + m2_loss.item()) dur_loss_avg.add(dur_loss.item()) step = model_tts.get_step() k = step // 1000 duration_avg.add(time.time() - start) # pitch_loss_avg.add(pitch_loss.item()) speed = 1. / duration_avg.get() msg_tts = f'| TTS MODEL (unsupervised training ): '\ f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \ f'| Dur Loss: {dur_loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' stream(msg_tts) # m_val_loss, dur_val_loss = self.evaluate(model_tts, tts_session.val_set) #TODO: combine L and update # asr_s_loss = torch.tensor(asr_s_loss).to(device) combined_loss = 0.5 * (tts_s_loss + asr_s_loss) + (tts_u_loss + asr_u_loss) # backwards combined_loss.to(device) # print(combined_loss) combined_loss.backward() optimizer_tts.step() for state in optimizer_asr.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.to(device) optimizer_asr.step() m_loss_avg.reset() duration_avg.reset() # pitch_loss_avg.reset() dt_msg = f'\n\nFinished DT loop in epoch {e}!\n' stream(dt_msg) print(' ') return tts_u_loss, asr_u_loss
def train_session(self, model_tts: ForwardTacotron, model_asr: Wav2Vec2ForCTC, optimizer_tts: Optimizer, tts_session: ForwardSession, asr_session: ASRSession, asr_trainer, optimizer_asr) -> None: # print(tts_session.path) # exit() asr_trainer_state = {'logs': []} current_step = model_tts.get_step() tts_training_steps = tts_session.max_step - current_step try: _, asr_current_step = get_last_checkpoint( './checkpoints/sme_speech_tts.asr_forward/', 'model_at') asr_training_steps = tts_session.max_step - asr_current_step except: asr_current_step = 0 asr_training_steps = tts_training_steps total_iters = len(tts_session.train_set) epochs = tts_training_steps // total_iters + 1 simple_table([ ('TTS Steps', str(tts_training_steps // 1000) + 'k Steps'), ('ASR Steps', str(asr_training_steps // 1000) + 'k Steps'), ('Batch Size TTS', tts_session.bs), ('Learning Rate', tts_session.lr) ]) for g in optimizer_tts.param_groups: g['lr'] = tts_session.lr m_loss_avg = Averager() dur_loss_avg = Averager() duration_avg = Averager() device = next(model_tts.parameters() ).device # use same device as model parameters warnings.filterwarnings('ignore', category=UserWarning) for e in range(1, epochs + 1): #tts train loop for epoch for i, (x, m, ids, x_lens, mel_lens, dur) in enumerate(tts_session.train_set, 1): start = time.time() model_tts.train() x, m, dur, x_lens, mel_lens = x.to(device), m.to(device), dur.to(device),\ x_lens.to(device), mel_lens.to(device) m1_hat, m2_hat, dur_hat = model_tts(x, m, dur, mel_lens) m1_loss = self.l1_loss(m1_hat, m, mel_lens) m2_loss = self.l1_loss(m2_hat, m, mel_lens) dur_loss = self.l1_loss(dur_hat.unsqueeze(1), dur.unsqueeze(1), x_lens) tts_s_loss = m1_loss + m2_loss + 0.1 * dur_loss optimizer_tts.zero_grad() # tts_s_loss.backward() torch.nn.utils.clip_grad_norm_(model_tts.parameters(), hp.tts_clip_grad_norm) # optimizer_tts.step() m_loss_avg.add(m1_loss.item() + m2_loss.item()) dur_loss_avg.add(dur_loss.item()) step = model_tts.get_step() k = step // 1000 duration_avg.add(time.time() - start) # pitch_loss_avg.add(pitch_loss.item()) speed = 1. / duration_avg.get() msg_tts = f'| TTS MODEL (supervised training ): '\ f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \ f'| Dur Loss: {dur_loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % hp.forward_checkpoint_every == 0: ckpt_name = f'forward_step{k}K' save_checkpoint('forward', self.paths, model_tts, optimizer_tts, name=ckpt_name, is_silent=True) if step % hp.forward_plot_every == 0: self.generate_plots(model_tts, tts_session) self.writer.add_scalar('Mel_Loss/train', m1_loss + m2_loss, model_tts.get_step()) self.writer.add_scalar('Duration_Loss/train', dur_loss, model_tts.get_step()) self.writer.add_scalar('Params/batch_size', tts_session.bs, model_tts.get_step()) self.writer.add_scalar('Params/learning_rate', tts_session.lr, model_tts.get_step()) stream(msg_tts) # print(msg_tts) # print(torch.cuda.memory_allocated(device=device)) # model_tts = model_tts.to('cpu') for step, inputs in enumerate(asr_session.train_set): optimizer_asr.zero_grad() model_asr.to(device) for k, v in inputs.items(): if isinstance(v, torch.Tensor): inputs[k] = v.to(device) model_asr.train() outputs = model_asr(**inputs) asr_s_loss = outputs["loss"] if isinstance( outputs, dict) else outputs[0] # asr_s_loss = asr_s_loss.mean() msg_asr = f'| ASR MODEL (supervised training) : '\ f'| Epoch: {e}/{epochs} ({step}/{len(asr_session.train_set)}) | Loss ASR: {asr_s_loss:#.4} '\ f' ||||||||||||||||||||||' stream(msg_asr) # # model_asr.to('cuda') m_val_loss, dur_val_loss = self.evaluate(model_tts, tts_session.val_set) eval_tts_msg = f'| TTS MODEL (supervised eval ): '\ f'| Epoch: {e}/{epochs} | Val Loss: {m_val_loss:#.4} ' \ f'| Dur Val Loss: {dur_val_loss:#.4} ' \ stream(eval_tts_msg) tts_eval_loss = m_val_loss + dur_val_loss # print(eval_tts_msg) # ASR eval supervised print('\nEvaluating ASR model ...') # model_asr.to('cpu') asr_eval_loss = 0 eval_wer = 0 for step, inputs in enumerate(asr_session.test_set): asr_eval_loss_i, logits_a, labels_a = asr_trainer.prediction_step( model_asr, inputs, False) asr_eval_loss += asr_eval_loss_i logits_a.to('cpu') eval_wer_i = asr_trainer.compute_metrics( EvalPrediction(predictions=logits_a, label_ids=labels_a)) eval_wer += eval_wer_i['wer'] # print(eval_wer) eval_wer = eval_wer / step asr_eval_loss = asr_eval_loss / step msg_asr_eval = f'| ASR MODEL (supervised eval) : Epoch {e}/{epochs} | Loss ASR: {asr_eval_loss:#.4} | WER: {eval_wer} |||||||||||||||||||||||||||||||||||||||||||||||||||||' stream(msg_asr_eval) # dual transformation loop # tts_s_loss = 3 # asr_s_loss = 1 tts_u_loss, asr_u_loss = self.dual_transform( model_tts, model_asr, optimizer_tts, optimizer_asr, asr_session.test_set, m_loss_avg, dur_loss_avg, device, asr_current_step, e, epochs, duration_avg, total_iters, tts_s_loss, asr_s_loss, tts_session.lr, tts_session.path) step += 1 asr_path = f'checkpoint-27364' modelasr_folder = './checkpoints/sme_speech_tts.asr_forward/' new_check = modelasr_folder + asr_path os.makedirs(new_check, exist_ok=True) # asr_path, asr_step = get_last_checkpoint(modelasr_folder, modelasr_name) save_checkpoint('forward', self.paths, model_tts, optimizer_tts, is_silent=True) # asr_u_loss = 2 if "logs" not in asr_trainer_state: asr_trainer_state['logs'] = [] asr_trainer_state['logs'].append({ 'step': step, 'epoch': e, 'asr_s_loss': int(asr_s_loss), 'asr_u_loss': int(asr_u_loss), 'tts_s_loss': int(tts_s_loss), 'tts_u_loss': int(tts_u_loss), 'tts_eval_loss': int(tts_eval_loss), 'asr_eval_loss': int(asr_eval_loss), 'eval_wer': eval_wer }) with open(f'{modelasr_folder+ asr_path}/dt_trainer_state.json', 'w') as f: json.dump(asr_trainer_state, f) model_asr.save_pretrained(f'{new_check}') torch.save(optimizer_asr.state_dict(), f'{new_check}/optimizer.pt') print("Exiting due to cuda OOM!") exit(11)
def train_session(self, model: Tacotron, optimizer: Optimizer, session: TTSSession) -> None: current_step = model.get_step() training_steps = session.max_step - current_step total_iters = len(session.train_set) epochs = training_steps // total_iters + 1 model.r = session.r simple_table([(f'Steps with r={session.r}', str(training_steps // 1000) + 'k Steps'), ('Batch Size', session.bs), ('Learning Rate', session.lr), ('Outputs/Step (r)', model.r)]) for g in optimizer.param_groups: g['lr'] = session.lr loss_avg = Averager() duration_avg = Averager() device = next( model.parameters()).device # use same device as model parameters for e in range(1, epochs + 1): for i, (x, m, ids, x_lens, mel_lens) in enumerate(session.train_set, 1): start = time.time() model.train() x, m = x.to(device), m.to(device) m1_hat, m2_hat, attention = model(x, m) m1_loss = F.l1_loss(m1_hat, m) m2_loss = F.l1_loss(m2_hat, m) loss = m1_loss + m2_loss optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), hp.tts_clip_grad_norm) optimizer.step() loss_avg.add(loss.item()) step = model.get_step() k = step // 1000 duration_avg.add(time.time() - start) speed = 1. / duration_avg.get() msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % hp.tts_checkpoint_every == 0: ckpt_name = f'taco_step{k}K' save_checkpoint('tts', self.paths, model, optimizer, name=ckpt_name, is_silent=True) if step % hp.tts_plot_every == 0: self.generate_plots(model, session) _, att_score = attention_score(attention, mel_lens) att_score = torch.mean(att_score) self.writer.add_scalar('Attention_Score/train', att_score, model.get_step()) self.writer.add_scalar('Loss/train', loss, model.get_step()) self.writer.add_scalar('Params/reduction_factor', session.r, model.get_step()) self.writer.add_scalar('Params/batch_size', session.bs, model.get_step()) self.writer.add_scalar('Params/learning_rate', session.lr, model.get_step()) stream(msg) val_loss, val_att_score = self.evaluate(model, session.val_set) self.writer.add_scalar('Loss/val', val_loss, model.get_step()) self.writer.add_scalar('Attention_Score/val', val_att_score, model.get_step()) save_checkpoint('tts', self.paths, model, optimizer, is_silent=True) loss_avg.reset() duration_avg.reset() print(' ')
def voc_train_loop(model, loss_func, optimiser, train_set, eval_set, test_set, lr, total_steps, device, hp): for p in optimiser.param_groups: p['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 trg = None patience = hp.patience min_val_loss = np.inf for e in range(1, epochs + 1): start = time.time() running_loss = 0. running_pase_reg_loss = 0. running_nll_loss = 0. pase_reg_loss = None for i, (x, y, xm) in enumerate(train_set, 1): if len(xm) == 2: # expand short and long term m xm, xlm = xm xm, xlm = xm.to(device), xlm.to(device) xm = xm.unsqueeze(1) xlm = xlm.unsqueeze(1) else: xm = xm.to(device).unsqueeze(1) xlm = None x, y = x.to(device), y.to(device) if hp.pase_ft: m = hp.pase(xm, xlm) else: with torch.no_grad(): m = hp.pase(xm, xlm) if hp.pase_lambda > 0: raise NotImplementedError # use an MSE loss weighted with pase_lamda # that tights the distorted PASE output # to the clean PASE soft-labels (loaded in m) pase_reg_loss = hp.pase_lambda * F.mse_loss(m, m_clean) y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) running_nll_loss += loss.item() optimiser.zero_grad() if pase_reg_loss is not None: total_loss = loss + pase_reg_loss running_pase_reg_loss += pase_reg_loss.item() pase_reg_avg_loss = running_pase_reg_loss / i else: total_loss = loss total_loss.backward() optimiser.step() running_loss += total_loss.item() speed = i / (time.time() - start) nll_avg_loss = running_nll_loss / i avg_loss = running_loss / i step = model.get_step() k = step // 1000 if step % hp.voc_write_every == 0: hp.writer.add_scalar('train/nll', avg_loss, step) if pase_reg_loss is not None: hp.writer.add_scalar('train/pase_reg_loss', pase_reg_avg_loss, step) if step % hp.voc_checkpoint_every == 0: if eval_set is not None: print('Validating') # validate the model val_loss = voc_eval_loop(model, loss_func, eval_set, device) if val_loss <= min_val_loss: patience = hp.patience print('Val loss improved: {:.4f} -> ' '{:.4f}'.format(min_val_loss, val_loss)) min_val_loss = val_loss else: patience -= 1 print('Val loss did not improve. Patience ' '{}/{}'.format(patience, hp.patience)) if patience == 0: print('Out of patience. Breaking the loop') break # set to train mode again model.train() # generate some test samples gen_genh_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output, hp=hp, device=device) model.checkpoint(paths.voc_checkpoints) if hp.pase_ft: hp.pase.train() hp.pase.save(paths.voc_checkpoints, step) if pase_reg_loss is None: msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | NLLoss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' else: msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Total Loss: {avg_loss:.4f} | NLLoss: {avg_nll_loss:.4f} | PASE reg loss: {pase_reg_avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) model.save(paths.voc_latest_weights) model.log(paths.voc_log, msg) print(' ')
def voc_eval_loop(model, loss_func, eval_set, device): total_iters = len(eval_set) trg = None model.eval() with torch.no_grad(): start = time.time() running_loss = 0. running_pase_reg_loss = 0. running_nll_loss = 0. pase_reg_loss = None for i, (x, y, xm) in enumerate(eval_set, 1): if len(xm) == 2: # expand short and long term m xm, xlm = xm xm, xlm = xm.to(device), xlm.to(device) xm = xm.unsqueeze(1) xlm = xlm.unsqueeze(1) else: xm = xm.to(device).unsqueeze(1) xlm = None x, y = x.to(device), y.to(device) m = hp.pase(xm, xlm) if hp.pase_lambda > 0: raise NotImplementedError # use an MSE loss weighted with pase_lamda # that tights the distorted PASE output # to the clean PASE soft-labels (loaded in m) pase_reg_loss = hp.pase_lambda * F.mse_loss(m, m_clean) y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) running_nll_loss += loss.item() if pase_reg_loss is not None: total_loss = loss + pase_reg_loss running_pase_reg_loss += pase_reg_loss.item() pase_reg_avg_loss = running_pase_reg_loss / i else: total_loss = loss running_loss += total_loss.item() speed = i / (time.time() - start) nll_avg_loss = running_nll_loss / i avg_loss = running_loss / i step = model.get_step() k = step // 1000 if pase_reg_loss is None: msg = f'| EVAL {i}/{total_iters} | NLLoss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' else: msg = f'| EVAL {i}/{total_iters} | Total Loss: {avg_loss:.4f} | NLLoss: {avg_nll_loss:.4f} | PASE reg loss: {pase_reg_avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) hp.writer.add_scalar('eval/nll', nll_avg_loss, step) if pase_reg_loss is not None: hp.writer.add_scalar('eval/pase_reg_loss', pase_reg_avg_loss, step) print(' ') return avg_loss
def train_session(self, model: ForwardTacotron, optimizer: Optimizer, session: TTSSession) -> None: current_step = model.get_step() training_steps = session.max_step - current_step total_iters = len(session.train_set) epochs = training_steps // total_iters + 1 simple_table([(f'Steps', str(training_steps // 1000) + 'k Steps'), ('Batch Size', session.bs), ('Learning Rate', session.lr)]) for g in optimizer.param_groups: g['lr'] = session.lr m_loss_avg = Averager() dur_loss_avg = Averager() duration_avg = Averager() pitch_loss_avg = Averager() device = next( model.parameters()).device # use same device as model parameters for e in range(1, epochs + 1): for i, batch in enumerate(session.train_set, 1): batch = to_device(batch, device=device) start = time.time() model.train() pitch_zoneout_mask = torch.rand( batch['x'].size()) > self.train_cfg['pitch_zoneout'] energy_zoneout_mask = torch.rand( batch['x'].size()) > self.train_cfg['energy_zoneout'] pitch_target = batch['pitch'].detach().clone() energy_target = batch['energy'].detach().clone() batch['pitch'] = batch['pitch'] * pitch_zoneout_mask.to( device).float() batch['energy'] = batch['energy'] * energy_zoneout_mask.to( device).float() pred = model(batch) m1_loss = self.l1_loss(pred['mel'], batch['mel'], batch['mel_len']) m2_loss = self.l1_loss(pred['mel_post'], batch['mel'], batch['mel_len']) dur_loss = self.l1_loss(pred['dur'].unsqueeze(1), batch['dur'].unsqueeze(1), batch['x_len']) pitch_loss = self.l1_loss(pred['pitch'], pitch_target.unsqueeze(1), batch['x_len']) energy_loss = self.l1_loss(pred['energy'], energy_target.unsqueeze(1), batch['x_len']) loss = m1_loss + m2_loss \ + self.train_cfg['dur_loss_factor'] * dur_loss \ + self.train_cfg['pitch_loss_factor'] * pitch_loss \ + self.train_cfg['energy_loss_factor'] * energy_loss optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), self.train_cfg['clip_grad_norm']) optimizer.step() m_loss_avg.add(m1_loss.item() + m2_loss.item()) dur_loss_avg.add(dur_loss.item()) step = model.get_step() k = step // 1000 duration_avg.add(time.time() - start) pitch_loss_avg.add(pitch_loss.item()) speed = 1. / duration_avg.get() msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \ f'| Dur Loss: {dur_loss_avg.get():#.4} | Pitch Loss: {pitch_loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % self.train_cfg['checkpoint_every'] == 0: save_checkpoint(model=model, optim=optimizer, config=self.config, path=self.paths.forward_checkpoints / f'forward_step{k}k.pt') if step % self.train_cfg['plot_every'] == 0: self.generate_plots(model, session) self.writer.add_scalar('Mel_Loss/train', m1_loss + m2_loss, model.get_step()) self.writer.add_scalar('Pitch_Loss/train', pitch_loss, model.get_step()) self.writer.add_scalar('Energy_Loss/train', energy_loss, model.get_step()) self.writer.add_scalar('Duration_Loss/train', dur_loss, model.get_step()) self.writer.add_scalar('Params/batch_size', session.bs, model.get_step()) self.writer.add_scalar('Params/learning_rate', session.lr, model.get_step()) stream(msg) val_out = self.evaluate(model, session.val_set) self.writer.add_scalar('Mel_Loss/val', val_out['mel_loss'], model.get_step()) self.writer.add_scalar('Duration_Loss/val', val_out['dur_loss'], model.get_step()) self.writer.add_scalar('Pitch_Loss/val', val_out['pitch_loss'], model.get_step()) self.writer.add_scalar('Energy_Loss/val', val_out['energy_loss'], model.get_step()) save_checkpoint(model=model, optim=optimizer, config=self.config, path=self.paths.forward_checkpoints / 'latest_model.pt') m_loss_avg.reset() duration_avg.reset() pitch_loss_avg.reset() print(' ')
def voc_train_loop(model, loss_func, optimizer, train_set, test_set, init_lr, final_lr, total_steps): total_iters = len(train_set) epochs = int((total_steps - model.get_step()) // total_iters + 1) if hp.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') torch.backends.cudnn.benchmark = True for e in range(1, epochs + 1): adjust_learning_rate(optimizer, e, epochs, init_lr, final_lr) start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.cuda(), m.cuda(), y.cuda() y_hat = model(x, m) if model.mode == 'RAW' : y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL' : y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) optimizer.zero_grad() if hp.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) else: loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() running_loss += loss.item() speed = i / (time.time() - start) avg_loss = running_loss / i step = model.get_step() k = step // 1000 if step % hp.voc_checkpoint_every == 0 : model.eval() gen_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output) model.checkpoint(paths.voc_checkpoints) model.train() msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) model.save(paths.voc_latest_weights) model.log(paths.voc_log, msg)
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer, train_set, test_set, init_lr, final_lr, total_steps): # Use same device as model parameters device = next(model.parameters()).device # for g in optimizer.param_groups: g['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 for e in range(1, epochs + 1): adjust_learning_rate(optimizer, e, epochs, init_lr, final_lr) # 初始学习率与最终学习率-Begee start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.to(device), m.to(device), y.to( device) # x/y: (Batch, sub_bands, T) ######################### MultiBand-WaveRNN ######################### if hp.voc_multiband: y0 = y[:, 0, :].squeeze(0).unsqueeze( -1) # y0/y1/y2/y3: (Batch, T, 1) y1 = y[:, 1, :].squeeze(0).unsqueeze(-1) y2 = y[:, 2, :].squeeze(0).unsqueeze(-1) y3 = y[:, 3, :].squeeze(0).unsqueeze(-1) y_hat = model(x, m) # (Batch, T, num_classes, sub_bands) if model.mode == 'RAW': y_hat0 = y_hat[:, :, :, 0].transpose(1, 2).unsqueeze( -1) # (Batch, num_classes, T, 1) y_hat1 = y_hat[:, :, :, 1].transpose(1, 2).unsqueeze(-1) y_hat2 = y_hat[:, :, :, 2].transpose(1, 2).unsqueeze(-1) y_hat3 = y_hat[:, :, :, 3].transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y0 = y0.float() y1 = y1.float() y2 = y2.float() y3 = y3.float() loss = loss_func(y_hat0, y0) + loss_func( y_hat1, y1) + loss_func(y_hat2, y2) + loss_func( y_hat3, y3) ######################### MultiBand-WaveRNN ######################### optimizer.zero_grad() loss.backward() if hp.voc_clip_grad_norm is not None: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hp.voc_clip_grad_norm).cpu() if np.isnan(grad_norm): print('grad_norm was NaN!') optimizer.step() running_loss += loss.item() avg_loss = running_loss / i speed = i / (time.time() - start) step = model.get_step() k = step // 1000 if step % hp.voc_checkpoint_every == 0: gen_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output) ckpt_name = f'wave_step{k}K' save_checkpoint('voc', paths, model, optimizer, name=ckpt_name, is_silent=True) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) # Must save latest optimizer state to ensure that resuming training # doesn't produce artifacts save_checkpoint('voc', paths, model, optimizer, is_silent=True) model.log(paths.voc_log, msg) print(' ')
def train_session(self, model: WaveRNN, optimizer: Optimizer, session: VocSession, train_gta: bool) -> None: current_step = model.get_step() training_steps = session.max_step - current_step total_iters = len(session.train_set) epochs = training_steps // total_iters + 1 simple_table([(f'Steps ', str(training_steps // 1000) + 'k'), ('Batch Size', session.bs), ('Learning Rate', session.lr), ('Sequence Length', self.train_cfg['seq_len']), ('GTA Training', train_gta)]) for g in optimizer.param_groups: g['lr'] = session.lr loss_avg = Averager() duration_avg = Averager() device = next( model.parameters()).device # use same device as model parameters for e in range(1, epochs + 1): for i, batch in enumerate(session.train_set, 1): start = time.time() model.train() batch = to_device(batch, device=device) x, y = batch['x'], batch['y'] y_hat = model(x, batch['mel']) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = batch['y'].float() y = y.unsqueeze(-1) loss = self.loss_func(y_hat, y) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), self.train_cfg['clip_grad_norm']) optimizer.step() loss_avg.add(loss.item()) step = model.get_step() k = step // 1000 duration_avg.add(time.time() - start) speed = 1. / duration_avg.get() msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % self.train_cfg['gen_samples_every'] == 0: stream(msg + 'generating samples...') gen_result = self.generate_samples(model, session) if gen_result is not None: mel_loss, gen_wav = gen_result self.writer.add_scalar('Loss/generated_mel_l1', mel_loss, model.get_step()) self.track_top_models(mel_loss, gen_wav, model) if step % self.train_cfg['checkpoint_every'] == 0: save_checkpoint(model=model, optim=optimizer, config=self.config, path=self.paths.voc_checkpoints / f'wavernn_step{k}k.pt') self.writer.add_scalar('Loss/train', loss, model.get_step()) self.writer.add_scalar('Params/batch_size', session.bs, model.get_step()) self.writer.add_scalar('Params/learning_rate', session.lr, model.get_step()) stream(msg) val_loss = self.evaluate(model, session.val_set) self.writer.add_scalar('Loss/val', val_loss, model.get_step()) save_checkpoint(model=model, optim=optimizer, config=self.config, path=self.paths.voc_checkpoints / 'latest_model.pt') loss_avg.reset() duration_avg.reset() print(' ')
def train_session(self, model: ForwardTacotron, optimizer: Optimizer, session: TTSSession) -> None: current_step = model.get_step() training_steps = session.max_step - current_step total_iters = len(session.train_set) epochs = training_steps // total_iters + 1 simple_table([(f'Steps', str(training_steps // 1000) + 'k Steps'), ('Batch Size', session.bs), ('Learning Rate', session.lr)]) for g in optimizer.param_groups: g['lr'] = session.lr m_loss_avg = Averager() dur_loss_avg = Averager() duration_avg = Averager() pitch_loss_avg = Averager() device = next(model.parameters()).device # use same device as model parameters for e in range(1, epochs + 1): for i, (x, m, ids, x_lens, mel_lens, dur, pitch, puncts) in enumerate( session.train_set, 1 ): start = time.time() model.train() x, m, dur, x_lens, mel_lens, pitch, puncts = ( x.to(device), m.to(device), dur.to(device), x_lens.to(device), mel_lens.to(device), pitch.to(device), puncts.to(device), ) # print("*" * 20) # print(x) # print("*" * 20) m1_hat, m2_hat, dur_hat, pitch_hat = model( x, m, dur, mel_lens, pitch, puncts ) m1_loss = self.l1_loss(m1_hat, m, mel_lens) m2_loss = self.l1_loss(m2_hat, m, mel_lens) dur_loss = self.l1_loss(dur_hat.unsqueeze(1), dur.unsqueeze(1), x_lens) pitch_loss = self.l1_loss(pitch_hat, pitch.unsqueeze(1), x_lens) loss = m1_loss + m2_loss + 0.3 * dur_loss + 0.1 * pitch_loss optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), hp.tts_clip_grad_norm) optimizer.step() m_loss_avg.add(m1_loss.item() + m2_loss.item()) dur_loss_avg.add(dur_loss.item()) step = model.get_step() k = step // 1000 duration_avg.add(time.time() - start) pitch_loss_avg.add(pitch_loss.item()) speed = 1. / duration_avg.get() msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \ f'| Dur Loss: {dur_loss_avg.get():#.4} | Pitch Loss: {pitch_loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % hp.forward_checkpoint_every == 0: ckpt_name = f'forward_step{k}K' save_checkpoint('forward', self.paths, model, optimizer, name=ckpt_name, is_silent=True) if step % hp.forward_plot_every == 0: self.generate_plots(model, session) self.writer.add_scalar('Mel_Loss/train', m1_loss + m2_loss, model.get_step()) self.writer.add_scalar('Pitch_Loss/train', pitch_loss, model.get_step()) self.writer.add_scalar('Duration_Loss/train', dur_loss, model.get_step()) self.writer.add_scalar('Params/batch_size', session.bs, model.get_step()) self.writer.add_scalar('Params/learning_rate', session.lr, model.get_step()) stream(msg) m_val_loss, dur_val_loss, pitch_val_loss = self.evaluate(model, session.val_set) self.writer.add_scalar('Mel_Loss/val', m_val_loss, model.get_step()) self.writer.add_scalar('Duration_Loss/val', dur_val_loss, model.get_step()) self.writer.add_scalar('Pitch_Loss/val', pitch_val_loss, model.get_step()) save_checkpoint('forward', self.paths, model, optimizer, is_silent=True) m_loss_avg.reset() duration_avg.reset() pitch_loss_avg.reset() print(' ')
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer, train_set, test_set, lr, total_steps): # Use same device as model parameters device = next(model.parameters()).device # set learning rate for g in optimizer.param_groups: g['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 total_number_of_batches = len(train_set) writer = SummaryWriter("runs/{0}-{1}".format( model_name_prefix, datetime.now().strftime("%Y%m%d-%H%M%S"))) scheduler = StepLR(optimizer, step_size=1, gamma=0.983) for e in range(EPOCH, epochs + 1): start = time.time() running_loss = 0. avg_loss = 0 for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.to(device), m.to(device), y.to(device) # Parallelize model onto GPUS using workaround due to python bug if device.type == 'cuda' and torch.cuda.device_count() > 1: y_hat = data_parallel_workaround(model, x, m) else: y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) optimizer.zero_grad() loss.backward() if hp.voc_clip_grad_norm is not None: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hp.voc_clip_grad_norm) optimizer.step() running_loss += loss.item() avg_loss = running_loss / i speed = i / (time.time() - start) step = model.get_step() k = step // 1000 # Write to tensorboard per batch writer.add_scalar('Epoch loss', loss.item(), e * total_number_of_batches + i) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) """ ####################### Testing ############################ torch.cuda.empty_cache() loss_test = 0 for _, (x_test, y_test, m_test) in enumerate(test_set, 1): x_test, m_test, y_test = x_test.to(device), m_test.to(device), y_test.to(device) if device.type == 'cuda' and torch.cuda.device_count() > 1: raise RuntimeError("Unsupported") else: y_test_hat = model(x_test, m_test) if model.mode == 'RAW': y_test_hat = y_test_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y_test = y_test.float() y_test = y_test.unsqueeze(-1) loss_test += loss_func(y_test_hat, y_test).item() avg_loss_test = loss_test / len(test_set) msg = f'| Epoch: {e}/{epochs} | Test-Loss: {loss_test:.4f} | Test-AvgLoss: {avg_loss_test:.4f} | ' stream("\n") stream(msg) writer.add_scalar('Test loss', loss_test, e) writer.add_scalar('Average test loss', avg_loss_test, e) ############################################################ """ # Write to tensorboard per epoch writer.add_scalar('Running loss', running_loss, e) writer.add_scalar('Average loss', avg_loss, e) # Must save latest optimizer state to ensure that resuming training # doesn't produce artifacts save_checkpoint('voc', paths, model, optimizer, name="{0}-epoch-{1}-loss-{2}".format( model_name_prefix, e, avg_loss), is_silent=True) model.log(paths.voc_log, msg) print(' ') scheduler.step() print('Epoch:', e, 'LR:', scheduler.get_lr())
def voc_train_loop(paths: Paths, model: WaveRNN, loss_func, optimizer, train_set, test_set, lr, total_steps): # Use same device as model parameters device = next(model.parameters()).device for g in optimizer.param_groups: g['lr'] = lr total_iters = len(train_set) epochs = (total_steps - model.get_step()) // total_iters + 1 for e in range(1, epochs + 1): start = time.time() running_loss = 0. for i, (x, y, m) in enumerate(train_set, 1): x, m, y = x.to(device), m.to(device), y.to(device) # Parallelize model onto GPUS using workaround due to python bug if device.type == 'cuda' and torch.cuda.device_count() > 1: y_hat = data_parallel_workaround(model, x, m) else: y_hat = model(x, m) if model.mode == 'RAW': y_hat = y_hat.transpose(1, 2).unsqueeze(-1) elif model.mode == 'MOL': y = y.float() y = y.unsqueeze(-1) loss = loss_func(y_hat, y) optimizer.zero_grad() loss.backward() if hp.voc_clip_grad_norm is not None: grad_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), hp.voc_clip_grad_norm) if np.isnan(grad_norm): print('grad_norm was NaN!') optimizer.step() running_loss += loss.item() avg_loss = running_loss / i speed = i / (time.time() - start) step = model.get_step() k = step // 1000 if step % hp.voc_checkpoint_every == 0: gen_testset(model, test_set, hp.voc_gen_at_checkpoint, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap, paths.voc_output) ckpt_name = f'wave_step{k}K' save_checkpoint('voc', paths, model, optimizer, name=ckpt_name, is_silent=True) msg = f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Loss: {avg_loss:.4f} | {speed:.1f} steps/s | Step: {k}k | ' stream(msg) # Must save latest optimizer state to ensure that resuming training # doesn't produce artifacts save_checkpoint('voc', paths, model, optimizer, is_silent=True) model.log(paths.voc_log, msg) print(' ')
return basename if __name__ == '__main__': wav_paths = list(Path(hp.wav_dir).rglob('*.wav')) print(f'\n{len(wav_paths)} wav files found in "{hp.wav_dir}"\n') if len(wav_paths) == 0: print('Please point wav_dir in hparams.py to your dataset.') else: os.makedirs(hp.data_dir, exist_ok=True) os.makedirs(os.path.join(hp.data_dir, 'mel'), exist_ok=True) os.makedirs(os.path.join(hp.data_dir, 'quant'), exist_ok=True) pool = Pool(processes=cpu_count() - 1) basenames = [] for i, basename in enumerate( pool.imap_unordered(process_wav, wav_paths), 1): basenames.append(basename) bar = progbar(i, len(wav_paths)) message = f'{bar} {i}/{len(wav_paths)} ' stream(message) with open(hp.data_dir + '/basenames.pkl', 'wb') as f: pickle.dump(basenames, f) print('\n\nCompleted.\n')