def log_validation(self, reduced_loss, model, y, y_pred, iteration): log_dict = { "loss/val": reduced_loss, } _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) align = Image.fromarray( plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T)) align.save(os.path.join(self.outdir, f'align_{iteration:08}.png')) target = Image.fromarray( plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy())) target.save(os.path.join(self.outdir, f'target_{iteration:08}.png')) output = Image.fromarray( plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy())) output.save(os.path.join(self.outdir, f'output_{iteration:08}.png')) gate = Image.fromarray( plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy())) gate.save(os.path.join(self.outdir, f'gate_{iteration:08}.png')) log_dict.update({ "alignment": wandb.Image(Image.fromarray( plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T)), caption='att'), "mel_target": wandb.Image(Image.fromarray( plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy())), caption='att'), "mel_predicted": wandb.Image(Image.fromarray( plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy())), caption='att'), "gate": wandb.Image(Image.fromarray( plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy())), caption='att'), }) wandb.log(log_dict, step=iteration)
def log_teacher_forced_validation(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob): self.add_scalar("teacher_forced_validation.loss", reduced_loss, iteration) self.add_scalar("teacher_forced_validation.attention_alignment_diagonality", diagonality, iteration) self.add_scalar("teacher_forced_validation.average_max_attention_weight", avg_prob, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets, *_ = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = 0 # plot longest audio file self.add_image( "teacher_forced_alignment", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image( "mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC') idx = 1 # and plot 2nd longest audio file self.add_image( "teacher_forced_alignment2", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image( "mel_target2", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "mel_predicted2", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "gate2", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, model, y, y_pred, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets, alignment_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image( "alignment", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image( "mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, model, y, y_pred, iteration): # self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters # for tag, value in model.named_parameters(): # tag = tag.replace('.', '/') # self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) align = plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T) spec = plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()) mel = plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()) gate = plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()) wandb = self.wandb wandb.log({ "validation loss": reduced_loss, "alignment": wandb.Image(align), "spectrogram": wandb.Image(spec), "mel_spec": wandb.Image(mel), "gate": wandb.Image(gate), })
def log_validation(self, reduced_loss, model, y, y_pred, gst_scores, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments, _ = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) align_idx = alignments[idx].data.cpu().numpy().T gst_scores = gst_scores.data.cpu().numpy().T # print("Validation GST scores before plotting to tensorboard: {}".format(gst_scores.shape)) meltarg_idx = mel_targets[idx].data.cpu().numpy() melout_idx = mel_outputs[idx].data.cpu().numpy() self.add_image("alignment", plot_alignment_to_numpy(align_idx), iteration) self.add_image("gst_scores", plot_gst_scores_to_numpy(gst_scores), iteration) self.add_image("mel_target", plot_spectrogram_to_numpy(meltarg_idx), iteration) self.add_image("mel_predicted", plot_spectrogram_to_numpy(melout_idx), iteration) self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration)
def log_infer(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob): self.add_scalar("infer.loss", reduced_loss, iteration) self.add_scalar("infer.attention_alignment_diagonality", diagonality, iteration) self.add_scalar("infer.average_max_attention_weight", avg_prob, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets, *_ = y # plot alignment, mel target and predicted, gate target and predicted idx = 0 # plot longest audio file self.add_image( "infer_alignment", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image( "infer_mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "infer_mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "infer_gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC') idx = 1 # and plot 2nd longest audio file self.add_image( "infer_alignment2", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image( "infer_mel_target2", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "infer_mel_predicted2", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "infer_gate2", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, model, y, y_pred, iteration, model_name="", log_embedding=False): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image( "alignment", np.moveaxis( plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), 2, 0), iteration) self.add_image( "mel_target", np.moveaxis( plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), 2, 0), iteration) self.add_image( "mel_predicted", np.moveaxis( plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), 2, 0), iteration) self.add_image( "gate", np.moveaxis( plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), 2, 0), iteration) if log_embedding: self.add_embedding( model.speaker_embedding.weight.detach().cpu().numpy(), [ str(i) for i in range(model.speaker_embedding.num_embeddings) ], global_step=iteration, tag='emb_{}'.format(model_name))
def log_infer(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob): self.add_scalar("infer.loss", reduced_loss, iteration) self.add_scalar("infer.attention_alignment_diagonality", diagonality, iteration) self.add_scalar("infer.average_max_attention_weight", avg_prob, iteration) mel_outputs, mel_outputs_postnet, gate_outputs, alignments, *_ = y_pred if mel_outputs_postnet is not None: mel_outputs = mel_outputs_postnet mel_outputs_GAN = y_pred[8][0] mel_targets, gate_targets, *_ = y mel_outputs = mel_outputs[:, :mel_targets.shape[1], :] plot_n_files = 5 # plot infer alignment, mel target and predicted, gate predicted for idx in range(plot_n_files): # plot longest x audio files str_idx = '' if idx == 0 else idx self.add_image(f"infer_alignment{str_idx}", plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image(f"infer_mel_target{str_idx}", plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image(f"infer_mel_predicted{str_idx}", plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') if mel_outputs_GAN is not None: self.add_image(f"mel_predicted_GAN{str_idx}", plot_spectrogram_to_numpy( mel_outputs_GAN[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image(f"infer_gate{str_idx}", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid( gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, model, y, y_pred, iteration, speaker_acc=0, augment_acc=0): self.add_scalar("validation.loss", reduced_loss, iteration) self.add_scalar("Speaker_classifier_ACC", speaker_acc, iteration) self.add_scalar("Augment_classifier_ACC", augment_acc, iteration) _, mel_outputs, gate_outputs, alignments, speaker_output, augmentation_output, _, _ = y_pred mel_targets, gate_targets, speaker_id, labels = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image( "alignment", torch.from_numpy( plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T)).permute(2, 0, 1), iteration) self.add_image( "mel_target", torch.from_numpy( plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy())).permute(2, 0, 1), iteration) self.add_image( "mel_predicted", torch.from_numpy( plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy())).permute(2, 0, 1), iteration) self.add_image( "gate", torch.from_numpy( plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), F.sigmoid(gate_outputs[idx]).data.cpu().numpy())).permute( 2, 0, 1), iteration)
def log_validation(self, reduced_loss, model, y, y_pred, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image("alignment", plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image("mel_target", plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image("mel_predicted", plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image("gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid( gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC') mel = mel_outputs.cpu()[0] if len(mel.shape) == 2: mel = mel.unsqueeze(0) audio = self.melgan.inference(mel) self.add_audio('audio', audio, global_step=iteration, sample_rate=self.sampling_rate, walltime=None)
def log_validation(self, reduced_loss, model, y, y_pred, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) wandb.log({'validation.loss': reduced_loss}, step=iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) alignment_arr = plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T) self.add_image("alignment", alignment_arr, iteration) wandb.log( {"alignment": [wandb.Image(alignment_arr, caption="Alignment")]}, step=iteration) mel_target = plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()) self.add_image("mel_target", mel_target, iteration) wandb.log( {"mel_target": [wandb.Image(mel_target, caption="Mel target")]}, step=iteration) mel_predicted = plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()) self.add_image("mel_predicted", mel_predicted, iteration) wandb.log( { "mel_predicted": [wandb.Image(mel_predicted, caption="Mel predicted")] }, step=iteration) self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration)
def log_validation(self, reduced_loss, model, x, y, y_pred, iteration, hparams): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image( "alignment", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration) self.add_image( "mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration) self.add_image( "mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration) self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration) self.add_audio( "audio_from_target", synthesis_griffin_lim(mel_targets[idx].unsqueeze(0), hparams), iteration, hparams.sampling_rate) self.add_audio( "audio_from_predicted", synthesis_griffin_lim(mel_outputs[idx].unsqueeze(0), hparams), iteration, hparams.sampling_rate) self.add_text( "text", ''.join([ _id_to_symbol[symbol_id] for symbol_id in x[0][idx].data.cpu().numpy() ]), iteration)
def log_validation(self, reduced_loss, model, y, y_pred, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) if self.use_vae: _, mel_outputs, gate_outputs, alignments, mus, _, _, emotions = y_pred else: _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y #print('emotion:\n{}'.format(emotions)) # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image( "alignment", plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), iteration, dataformats=self.dataformat) self.add_image( "mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration, dataformats=self.dataformat) self.add_image( "mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration, dataformats=self.dataformat) self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats=self.dataformat) if self.use_vae: self.add_image( "latent_dim (regular)", plot_scatter(mus, emotions), iteration, dataformats=self.dataformat) self.add_image( "latent_dim (t-sne)", plot_tsne(mus, emotions), iteration, dataformats=self.dataformat)
def log_validation(self, reduced_loss, model, y, y_pred, iteration, stft): self.add_scalar("validation.loss", reduced_loss, iteration) decoder_outputs, mel_outputs, gate_outputs, alignment = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, mel_outputs.size(0) - 1) # self.add_image( # "alignment", # plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), # iteration) index = 0 plot_spectrogram(mel_targets[index].data.cpu().numpy(), decoder_outputs[index].data.cpu().numpy(), mel_outputs[index].data.cpu().numpy(), alignment[index].data.cpu().numpy(), self.logdir, iteration, append="eval") save_audio(mel_outputs[index], self.logdir, iteration, stft, False) self.add_image( "mel_target", plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), iteration) self.add_image( "mel_predicted", plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), iteration) self.add_image( "gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration)
def log_teacher_forced_validation(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob): self.add_scalar("teacher_forced_validation.loss", reduced_loss, iteration) self.add_scalar( "teacher_forced_validation.attention_alignment_diagonality", diagonality, iteration) self.add_scalar( "teacher_forced_validation.average_max_attention_weight", avg_prob, iteration) mel_outputs, mel_outputs_postnet, gate_outputs, alignments, *_ = y_pred if mel_outputs_postnet is not None: mel_outputs = mel_outputs_postnet mel_outputs_GAN = y_pred[8][0] mel_targets, gate_targets, *_ = y mel_outputs = mel_outputs[:, :mel_targets.shape[1], :] mel_MSE_map = torch.nn.MSELoss(reduction='none')(mel_outputs, mel_targets) mel_MSE_map[:, -1, -1] = 20.0 # because otherwise the color map scale is crap # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) plot_n_files = 5 # plot alignment, mel target and predicted, gate target and predicted for idx in range(plot_n_files): # plot longest x audio files str_idx = '' if idx == 0 else idx self.add_image(f"teacher_forced_alignment{str_idx}", plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image(f"mel_target{str_idx}", plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image(f"mel_predicted{str_idx}", plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') if mel_outputs_GAN is not None: self.add_image(f"mel_predicted_GAN{str_idx}", plot_spectrogram_to_numpy( mel_outputs_GAN[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image(f"mel_squared_error{str_idx}", plot_spectrogram_to_numpy( mel_MSE_map[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image(f"gate{str_idx}", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid( gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, reduced_losses, reduced_acces, model, y, y_pred, iteration, task): self.add_scalar('validation.loss.%s'%task, reduced_loss, iteration) self.add_scalar("validation.loss.%s.recon"%task, reduced_losses[0], iteration) self.add_scalar("validation.loss.%s.recon_post"%task, reduced_losses[1], iteration) self.add_scalar("validation.loss.%s.stop"%task, reduced_losses[2], iteration) self.add_scalar("validation.loss.%s.contr"%task, reduced_losses[3], iteration) self.add_scalar("validation.loss.%s.consi"%task, reduced_losses[4], iteration) self.add_scalar("validation.loss.%s.spenc"%task, reduced_losses[5], iteration) self.add_scalar("validation.loss.%s.spcla"%task, reduced_losses[6], iteration) self.add_scalar("validation.loss.%s.texcl"%task, reduced_losses[7], iteration) self.add_scalar("validation.loss.%s.spadv"%task, reduced_losses[8], iteration) self.add_scalar('validation.acc.%s.spenc'%task, reduced_acces[0], iteration) self.add_scalar('validation.acc.%s.spcla'%task, reduced_acces[1], iteration) self.add_scalar('validation.acc.%s.texcl'%task, reduced_acces[2], iteration) predicted_mel, post_output, predicted_stop, alignments, \ text_hidden, mel_hidden, text_logit_from_mel_hidden, \ audio_seq2seq_alignments, \ speaker_logit_from_mel, speaker_logit_from_mel_hidden, \ text_lengths, mel_lengths, SE_alignments = y_pred #predicted_mel, post_output, predicted_stop, alignments, \ # text_hidden, mel_hidden, text_logit_from_mel_hidden, \ # audio_seq2seq_alignments, \ # speaker_logit_from_mel_hidden, \ # text_lengths, mel_lengths = y_pred # text_target, mel_target, spc_target, speaker_target, stop_target = y text_target, mel_target, speaker_target, stop_target = y stop_target = stop_target.reshape(stop_target.size(0), -1, int(stop_target.size(1)/predicted_stop.size(1))) stop_target = stop_target[:,:,0] #pdb.set_trace() # plot distribution of parameters #for tag, value in model.named_parameters(): # tag = tag.replace('.', '/') # self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, stop target and predicted idx = random.randint(0, alignments.size(0) - 1) alignments = alignments.data.cpu().numpy() audio_seq2seq_alignments = audio_seq2seq_alignments.data.cpu().numpy() SE_alignments = SE_alignments.data.cpu().numpy() self.add_image( "%s.alignment"%task, plot_alignment_to_numpy(alignments[idx].T), iteration, dataformats='HWC') # plot more alignments plot_alignment(alignments[:4], self.ali_path+'/step-%d-%s.pdf'%(iteration, task)) self.add_image( "%s.audio_seq2seq_alignment"%task, plot_alignment_to_numpy(audio_seq2seq_alignments[idx].T), iteration, dataformats='HWC') self.add_image( "%s.SE_alignments"%task, plot_alignment_to_numpy(SE_alignments[idx].T), iteration, dataformats='HWC') self.add_image( "%s.mel_target"%task, plot_spectrogram_to_numpy(mel_target[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "%s.mel_predicted"%task, plot_spectrogram_to_numpy(predicted_mel[idx].data.cpu().numpy()), iteration, dataformats='HWC') # self.add_image( # "%s.spc_target"%task, # plot_spectrogram_to_numpy(spc_target[idx].data.cpu().numpy()), # iteration, dataformats='HWC') self.add_image( "%s.post_predicted"%task, plot_spectrogram_to_numpy(post_output[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image( "%s.stop"%task, plot_gate_outputs_to_numpy( stop_target[idx].data.cpu().numpy(), F.sigmoid(predicted_stop[idx]).data.cpu().numpy()), iteration, dataformats='HWC')
def log_validation(self, reduced_loss, model, y, y_pred, iteration): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image("alignment", plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image("mel_target", plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image("mel_predicted", plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image("gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid( gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC') # import random # import torch.nn.functional as F # from tensorboardX import SummaryWriter # from plotting_utils import plot_alignment_to_numpy, plot_spectrogram_to_numpy # from plotting_utils import plot_gate_outputs_to_numpy # class Tacotron2Logger(SummaryWriter): # def __init__(self, logdir): # super(Tacotron2Logger, self).__init__(logdir) # def log_training(self, reduced_loss, grad_norm, learning_rate, duration, # iteration): # self.add_scalar("training.loss", reduced_loss, iteration) # self.add_scalar("grad.norm", grad_norm, iteration) # self.add_scalar("learning.rate", learning_rate, iteration) # self.add_scalar("duration", duration, iteration) # def log_validation(self, reduced_loss, model, y, y_pred, iteration): # self.add_scalar("validation.loss", reduced_loss, iteration) # _, mel_outputs, gate_outputs, alignments = y_pred # mel_targets, gate_targets = y # # plot distribution of parameters # for tag, value in model.named_parameters(): # tag = tag.replace('.', '/') # self.add_histogram(tag, value.data.cpu().numpy(), iteration) # # plot alignment, mel target and predicted, gate target and predicted # idx = random.randint(0, alignments.size(0) - 1) # self.add_image( # "alignment", # plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T), # iteration) # self.add_image( # "mel_target", # plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()), # iteration) # self.add_image( # "mel_predicted", # plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()), # iteration) # self.add_image( # "gate", # plot_gate_outputs_to_numpy( # gate_targets[idx].data.cpu().numpy(), # F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), # iteration)
def log_validation(self, reduced_loss, model, x, y, y_pred, iteration, epoch, sample_rate): text_padded, input_lengths, mel_padded, max_len, output_lengths = x #self.add_scalar("validation.loss", reduced_loss, iteration) # Tensorboard log _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') # self.add_histogram(tag, value.data.cpu().numpy(), iteration) # Tensorboard log wandb.log( { tag: wandb.Histogram(value.data.cpu().numpy()), "epoch": epoch, "iteration": iteration }, step=iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) text_len = input_lengths[idx].item() text_string = sequence_to_text(text_padded[idx].tolist())[:text_len] mel_len = get_mel_length(alignments, idx, text_len) mel = mel_outputs[idx:idx + 1, :, :mel_len] np_wav = self.mel2wav(mel.type('torch.cuda.HalfTensor')) np_alignment = plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T, decoding_len=mel_len) '''self.add_image( "alignment", np_alignment, iteration, dataformats='HWC')''' np_mel_target = plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()) '''self.add_image( "mel_target", np_mel_target, iteration, dataformats='HWC')''' np_mel_predicted = plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()) '''self.add_image( "mel_predicted", np_mel_predicted, iteration, dataformats='HWC')''' np_gate = plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()) '''self.add_image( "gate", np_gate, iteration, dataformats='HWC')''' # wandb log wandb.log( { "val/loss": reduced_loss, "val/alignment": [wandb.Image(np_alignment, caption=text_string)], "val/audio": [ wandb.Audio(np_wav.astype(np.float32), caption=text_string, sample_rate=sample_rate) ], "val/mel_target": [wandb.Image(np_mel_target)], "val/mel_predicted": [wandb.Image(np_mel_predicted)], "val/gate": [wandb.Image(np_gate)], "epoch": epoch, "iteration": iteration }, step=iteration) # foward attention ratio hop_list = [1] for hop_size in hop_list: mean_far, batch_far = forward_attention_ratio( alignments, input_lengths, hop_size) log_name = "mean_forward_attention_ratio.val/hop_size={}".format( hop_size) wandb.log( { log_name: mean_far, "epoch": epoch, "iteration": iteration }, step=iteration) log_name = "forward_attention_ratio.val/hop_size={}".format( hop_size) wandb.log( { log_name: wandb.Histogram(batch_far.data.cpu().numpy()), "epoch": epoch, "iteration": iteration }, step=iteration)
def log_validation(self, reduced_loss, model, y, y_pred, iteration, x): self.add_scalar("validation.loss", reduced_loss, iteration) _, mel_outputs, gate_outputs, alignments = y_pred mel_targets, gate_targets = y text_inputs = x[0] speaker_ids = x[5] # plot distribution of parameters for tag, value in model.named_parameters(): tag = tag.replace('.', '/') self.add_histogram(tag, value.data.cpu().numpy(), iteration) # plot alignment, mel target and predicted, gate target and predicted idx = random.randint(0, alignments.size(0) - 1) self.add_image("alignment", plot_alignment_to_numpy( alignments[idx].data.cpu().numpy().T), iteration, dataformats='HWC') self.add_image("mel_predicted", plot_spectrogram_to_numpy( mel_outputs[idx].data.cpu().numpy()), iteration, dataformats='HWC') self.add_image("gate", plot_gate_outputs_to_numpy( gate_targets[idx].data.cpu().numpy(), torch.sigmoid( gate_outputs[idx]).data.cpu().numpy()), iteration, dataformats='HWC') # 记录一下合成的语音效果。 audio_predicted = inv_linearspectrogram( mel_outputs[idx].data.cpu().numpy()) self.add_audio('audio_predicted', torch.from_numpy(audio_predicted), iteration, sample_rate=default_hparams.sample_rate) self.add_image("mel_target", plot_spectrogram_to_numpy( mel_targets[idx].data.cpu().numpy()), iteration, dataformats='HWC') audio_target = inv_linearspectrogram( mel_targets[idx].data.cpu().numpy()) self.add_audio('audio_target', torch.from_numpy(audio_target), iteration, sample_rate=default_hparams.sample_rate) spk = int(speaker_ids[idx].data.cpu().numpy().flatten()[0]) ph_ids = text_inputs[idx].data.cpu().numpy().flatten() phs_text = sequence_to_text(ph_ids) phs_size = len(ph_ids) reduced_loss = float(reduced_loss) audt_duration = int( len(audio_target) / (default_hparams.sample_rate / 1000)) audp_duration = int( len(audio_predicted) / (default_hparams.sample_rate / 1000)) spect_shape = mel_targets[idx].data.cpu().numpy().shape specp_shape = mel_outputs[idx].data.cpu().numpy().shape align_shape = alignments[idx].data.cpu().numpy().T.shape out_text = dict(speaker_id=spk, phonemes=phs_text, phonemes_size=phs_size, validation_loss=reduced_loss, audio_target_ms=audt_duration, audio_predicted_ms=audp_duration, spectrogram_target_shape=str(spect_shape), spectrogram_predicted_shape=str(specp_shape), alignment_shape=str(align_shape)) out_text = json.dumps(out_text, indent=4, ensure_ascii=False) out_text = f'<pre>{out_text}</pre>' # 支持html标签 self.add_text('text', out_text, iteration)