Exemple #1
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        log_dict = {
            "loss/val": reduced_loss,
        }

        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        align = Image.fromarray(
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T))
        align.save(os.path.join(self.outdir, f'align_{iteration:08}.png'))

        target = Image.fromarray(
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()))
        target.save(os.path.join(self.outdir, f'target_{iteration:08}.png'))

        output = Image.fromarray(
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()))
        output.save(os.path.join(self.outdir, f'output_{iteration:08}.png'))

        gate = Image.fromarray(
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()))
        gate.save(os.path.join(self.outdir, f'gate_{iteration:08}.png'))

        log_dict.update({
            "alignment":
            wandb.Image(Image.fromarray(
                plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T)),
                        caption='att'),
            "mel_target":
            wandb.Image(Image.fromarray(
                plot_spectrogram_to_numpy(
                    mel_targets[idx].data.cpu().numpy())),
                        caption='att'),
            "mel_predicted":
            wandb.Image(Image.fromarray(
                plot_spectrogram_to_numpy(
                    mel_outputs[idx].data.cpu().numpy())),
                        caption='att'),
            "gate":
            wandb.Image(Image.fromarray(
                plot_gate_outputs_to_numpy(
                    gate_targets[idx].data.cpu().numpy(),
                    torch.sigmoid(gate_outputs[idx]).data.cpu().numpy())),
                        caption='att'),
        })
        wandb.log(log_dict, step=iteration)
Exemple #2
0
    def log_teacher_forced_validation(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob):
        self.add_scalar("teacher_forced_validation.loss", reduced_loss, iteration)
        self.add_scalar("teacher_forced_validation.attention_alignment_diagonality", diagonality, iteration)
        self.add_scalar("teacher_forced_validation.average_max_attention_weight", avg_prob, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets, *_ = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)
        
        # plot alignment, mel target and predicted, gate target and predicted
        idx = 0 # plot longest audio file
        self.add_image(
            "teacher_forced_alignment",
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_target",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_predicted",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration, dataformats='HWC')
        
        idx = 1 # and plot 2nd longest audio file
        self.add_image(
            "teacher_forced_alignment2",
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_target2",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_predicted2",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "gate2",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration, dataformats='HWC')
Exemple #3
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets, alignment_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image(
            "alignment",
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_target",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "mel_predicted",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration, dataformats='HWC')
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        # self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        # for tag, value in model.named_parameters():
        #     tag = tag.replace('.', '/')
        #     self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        align = plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T)
        spec = plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy())
        mel = plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy())
        gate = plot_gate_outputs_to_numpy(
            gate_targets[idx].data.cpu().numpy(),
            torch.sigmoid(gate_outputs[idx]).data.cpu().numpy())

        wandb = self.wandb
        wandb.log({
            "validation loss": reduced_loss,
            "alignment": wandb.Image(align),
            "spectrogram": wandb.Image(spec),
            "mel_spec": wandb.Image(mel),
            "gate": wandb.Image(gate),
        })
Exemple #5
0
    def log_validation(self, reduced_loss, model, y, y_pred, gst_scores,
                       iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments, _ = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        align_idx = alignments[idx].data.cpu().numpy().T
        gst_scores = gst_scores.data.cpu().numpy().T
        # print("Validation GST scores before plotting to tensorboard: {}".format(gst_scores.shape))
        meltarg_idx = mel_targets[idx].data.cpu().numpy()
        melout_idx = mel_outputs[idx].data.cpu().numpy()

        self.add_image("alignment", plot_alignment_to_numpy(align_idx),
                       iteration)
        self.add_image("gst_scores", plot_gst_scores_to_numpy(gst_scores),
                       iteration)
        self.add_image("mel_target", plot_spectrogram_to_numpy(meltarg_idx),
                       iteration)
        self.add_image("mel_predicted", plot_spectrogram_to_numpy(melout_idx),
                       iteration)
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration)
Exemple #6
0
 def log_infer(self, reduced_loss, model, y, y_pred, iteration, val_teacher_force_till, val_p_teacher_forcing, diagonality, avg_prob):
     self.add_scalar("infer.loss", reduced_loss, iteration)
     self.add_scalar("infer.attention_alignment_diagonality", diagonality, iteration)
     self.add_scalar("infer.average_max_attention_weight", avg_prob, iteration)
     _, mel_outputs, gate_outputs, alignments = y_pred
     mel_targets, gate_targets, *_ = y
     
     # plot alignment, mel target and predicted, gate target and predicted
     idx = 0 # plot longest audio file
     self.add_image(
         "infer_alignment",
         plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_mel_target",
         plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_mel_predicted",
         plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_gate",
         plot_gate_outputs_to_numpy(
             gate_targets[idx].data.cpu().numpy(),
             torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
         iteration, dataformats='HWC')
     
     idx = 1 # and plot 2nd longest audio file
     self.add_image(
         "infer_alignment2",
         plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_mel_target2",
         plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_mel_predicted2",
         plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
         iteration, dataformats='HWC')
     self.add_image(
         "infer_gate2",
         plot_gate_outputs_to_numpy(
             gate_targets[idx].data.cpu().numpy(),
             torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
         iteration, dataformats='HWC')
Exemple #7
0
    def log_validation(self,
                       reduced_loss,
                       model,
                       y,
                       y_pred,
                       iteration,
                       model_name="",
                       log_embedding=False):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image(
            "alignment",
            np.moveaxis(
                plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
                2, 0), iteration)
        self.add_image(
            "mel_target",
            np.moveaxis(
                plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
                2, 0), iteration)
        self.add_image(
            "mel_predicted",
            np.moveaxis(
                plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
                2, 0), iteration)
        self.add_image(
            "gate",
            np.moveaxis(
                plot_gate_outputs_to_numpy(
                    gate_targets[idx].data.cpu().numpy(),
                    torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()), 2,
                0), iteration)
        if log_embedding:
            self.add_embedding(
                model.speaker_embedding.weight.detach().cpu().numpy(), [
                    str(i)
                    for i in range(model.speaker_embedding.num_embeddings)
                ],
                global_step=iteration,
                tag='emb_{}'.format(model_name))
Exemple #8
0
    def log_infer(self, reduced_loss, model, y, y_pred, iteration,
                  val_teacher_force_till, val_p_teacher_forcing, diagonality,
                  avg_prob):
        self.add_scalar("infer.loss", reduced_loss, iteration)
        self.add_scalar("infer.attention_alignment_diagonality", diagonality,
                        iteration)
        self.add_scalar("infer.average_max_attention_weight", avg_prob,
                        iteration)
        mel_outputs, mel_outputs_postnet, gate_outputs, alignments, *_ = y_pred
        if mel_outputs_postnet is not None:
            mel_outputs = mel_outputs_postnet
        mel_outputs_GAN = y_pred[8][0]
        mel_targets, gate_targets, *_ = y
        mel_outputs = mel_outputs[:, :mel_targets.shape[1], :]

        plot_n_files = 5
        # plot infer alignment, mel target and predicted, gate predicted
        for idx in range(plot_n_files):  # plot longest x audio files
            str_idx = '' if idx == 0 else idx
            self.add_image(f"infer_alignment{str_idx}",
                           plot_alignment_to_numpy(
                               alignments[idx].data.cpu().numpy().T),
                           iteration,
                           dataformats='HWC')
            self.add_image(f"infer_mel_target{str_idx}",
                           plot_spectrogram_to_numpy(
                               mel_targets[idx].data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
            self.add_image(f"infer_mel_predicted{str_idx}",
                           plot_spectrogram_to_numpy(
                               mel_outputs[idx].data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
            if mel_outputs_GAN is not None:
                self.add_image(f"mel_predicted_GAN{str_idx}",
                               plot_spectrogram_to_numpy(
                                   mel_outputs_GAN[idx].data.cpu().numpy()),
                               iteration,
                               dataformats='HWC')
            self.add_image(f"infer_gate{str_idx}",
                           plot_gate_outputs_to_numpy(
                               gate_targets[idx].data.cpu().numpy(),
                               torch.sigmoid(
                                   gate_outputs[idx]).data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
Exemple #9
0
    def log_validation(self,
                       reduced_loss,
                       model,
                       y,
                       y_pred,
                       iteration,
                       speaker_acc=0,
                       augment_acc=0):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        self.add_scalar("Speaker_classifier_ACC", speaker_acc, iteration)
        self.add_scalar("Augment_classifier_ACC", augment_acc, iteration)
        _, mel_outputs, gate_outputs, alignments, speaker_output, augmentation_output, _, _ = y_pred
        mel_targets, gate_targets, speaker_id, labels = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image(
            "alignment",
            torch.from_numpy(
                plot_alignment_to_numpy(
                    alignments[idx].data.cpu().numpy().T)).permute(2, 0, 1),
            iteration)
        self.add_image(
            "mel_target",
            torch.from_numpy(
                plot_spectrogram_to_numpy(
                    mel_targets[idx].data.cpu().numpy())).permute(2, 0, 1),
            iteration)
        self.add_image(
            "mel_predicted",
            torch.from_numpy(
                plot_spectrogram_to_numpy(
                    mel_outputs[idx].data.cpu().numpy())).permute(2, 0, 1),
            iteration)
        self.add_image(
            "gate",
            torch.from_numpy(
                plot_gate_outputs_to_numpy(
                    gate_targets[idx].data.cpu().numpy(),
                    F.sigmoid(gate_outputs[idx]).data.cpu().numpy())).permute(
                        2, 0, 1), iteration)
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image("alignment",
                       plot_alignment_to_numpy(
                           alignments[idx].data.cpu().numpy().T),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_target",
                       plot_spectrogram_to_numpy(
                           mel_targets[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_predicted",
                       plot_spectrogram_to_numpy(
                           mel_outputs[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("gate",
                       plot_gate_outputs_to_numpy(
                           gate_targets[idx].data.cpu().numpy(),
                           torch.sigmoid(
                               gate_outputs[idx]).data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')

        mel = mel_outputs.cpu()[0]
        if len(mel.shape) == 2:
            mel = mel.unsqueeze(0)
        audio = self.melgan.inference(mel)
        self.add_audio('audio',
                       audio,
                       global_step=iteration,
                       sample_rate=self.sampling_rate,
                       walltime=None)
Exemple #11
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        wandb.log({'validation.loss': reduced_loss}, step=iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        alignment_arr = plot_alignment_to_numpy(
            alignments[idx].data.cpu().numpy().T)
        self.add_image("alignment", alignment_arr, iteration)
        wandb.log(
            {"alignment": [wandb.Image(alignment_arr, caption="Alignment")]},
            step=iteration)

        mel_target = plot_spectrogram_to_numpy(
            mel_targets[idx].data.cpu().numpy())
        self.add_image("mel_target", mel_target, iteration)
        wandb.log(
            {"mel_target": [wandb.Image(mel_target, caption="Mel target")]},
            step=iteration)

        mel_predicted = plot_spectrogram_to_numpy(
            mel_outputs[idx].data.cpu().numpy())
        self.add_image("mel_predicted", mel_predicted, iteration)
        wandb.log(
            {
                "mel_predicted":
                [wandb.Image(mel_predicted, caption="Mel predicted")]
            },
            step=iteration)

        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration)
Exemple #12
0
    def log_validation(self, reduced_loss, model, x, y, y_pred, iteration,
                       hparams):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image(
            "alignment",
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
            iteration)
        self.add_image(
            "mel_target",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration)
        self.add_image(
            "mel_predicted",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration)
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                F.sigmoid(gate_outputs[idx]).data.cpu().numpy()), iteration)
        self.add_audio(
            "audio_from_target",
            synthesis_griffin_lim(mel_targets[idx].unsqueeze(0), hparams),
            iteration, hparams.sampling_rate)
        self.add_audio(
            "audio_from_predicted",
            synthesis_griffin_lim(mel_outputs[idx].unsqueeze(0), hparams),
            iteration, hparams.sampling_rate)
        self.add_text(
            "text", ''.join([
                _id_to_symbol[symbol_id]
                for symbol_id in x[0][idx].data.cpu().numpy()
            ]), iteration)
Exemple #13
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        if self.use_vae:
            _, mel_outputs, gate_outputs, alignments, mus, _, _, emotions = y_pred
        else:
            _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y
        #print('emotion:\n{}'.format(emotions))

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image(
            "alignment",
            plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
            iteration, dataformats=self.dataformat)
        self.add_image(
            "mel_target",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration, dataformats=self.dataformat)
        self.add_image(
            "mel_predicted",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration, dataformats=self.dataformat)
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                torch.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration, dataformats=self.dataformat)
        if self.use_vae:
            self.add_image(
                "latent_dim (regular)",
                plot_scatter(mus, emotions),
                iteration, dataformats=self.dataformat)
            self.add_image(
                "latent_dim (t-sne)",
                plot_tsne(mus, emotions),
                iteration, dataformats=self.dataformat)
Exemple #14
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration, stft):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        decoder_outputs, mel_outputs, gate_outputs, alignment = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, mel_outputs.size(0) - 1)
        # self.add_image(
        #     "alignment",
        #     plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
        #     iteration)
        index = 0

        plot_spectrogram(mel_targets[index].data.cpu().numpy(),
                     decoder_outputs[index].data.cpu().numpy(),
                     mel_outputs[index].data.cpu().numpy(),
                     alignment[index].data.cpu().numpy(),
                     self.logdir, iteration,
                     append="eval")

        save_audio(mel_outputs[index], self.logdir, iteration, stft, False)

        self.add_image(
            "mel_target",
            plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
            iteration)
        self.add_image(
            "mel_predicted",
            plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
            iteration)
        self.add_image(
            "gate",
            plot_gate_outputs_to_numpy(
                gate_targets[idx].data.cpu().numpy(),
                F.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
            iteration)
Exemple #15
0
    def log_teacher_forced_validation(self, reduced_loss, model, y, y_pred,
                                      iteration, val_teacher_force_till,
                                      val_p_teacher_forcing, diagonality,
                                      avg_prob):
        self.add_scalar("teacher_forced_validation.loss", reduced_loss,
                        iteration)
        self.add_scalar(
            "teacher_forced_validation.attention_alignment_diagonality",
            diagonality, iteration)
        self.add_scalar(
            "teacher_forced_validation.average_max_attention_weight", avg_prob,
            iteration)
        mel_outputs, mel_outputs_postnet, gate_outputs, alignments, *_ = y_pred
        if mel_outputs_postnet is not None:
            mel_outputs = mel_outputs_postnet
        mel_outputs_GAN = y_pred[8][0]
        mel_targets, gate_targets, *_ = y
        mel_outputs = mel_outputs[:, :mel_targets.shape[1], :]
        mel_MSE_map = torch.nn.MSELoss(reduction='none')(mel_outputs,
                                                         mel_targets)
        mel_MSE_map[:, -1,
                    -1] = 20.0  # because otherwise the color map scale is crap

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        plot_n_files = 5
        # plot alignment, mel target and predicted, gate target and predicted
        for idx in range(plot_n_files):  # plot longest x audio files
            str_idx = '' if idx == 0 else idx
            self.add_image(f"teacher_forced_alignment{str_idx}",
                           plot_alignment_to_numpy(
                               alignments[idx].data.cpu().numpy().T),
                           iteration,
                           dataformats='HWC')
            self.add_image(f"mel_target{str_idx}",
                           plot_spectrogram_to_numpy(
                               mel_targets[idx].data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
            self.add_image(f"mel_predicted{str_idx}",
                           plot_spectrogram_to_numpy(
                               mel_outputs[idx].data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
            if mel_outputs_GAN is not None:
                self.add_image(f"mel_predicted_GAN{str_idx}",
                               plot_spectrogram_to_numpy(
                                   mel_outputs_GAN[idx].data.cpu().numpy()),
                               iteration,
                               dataformats='HWC')
            self.add_image(f"mel_squared_error{str_idx}",
                           plot_spectrogram_to_numpy(
                               mel_MSE_map[idx].data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
            self.add_image(f"gate{str_idx}",
                           plot_gate_outputs_to_numpy(
                               gate_targets[idx].data.cpu().numpy(),
                               torch.sigmoid(
                                   gate_outputs[idx]).data.cpu().numpy()),
                           iteration,
                           dataformats='HWC')
Exemple #16
0
    def log_validation(self, reduced_loss, reduced_losses, reduced_acces, model, y, y_pred, iteration, task):

        self.add_scalar('validation.loss.%s'%task, reduced_loss, iteration)
        self.add_scalar("validation.loss.%s.recon"%task, reduced_losses[0], iteration)
        self.add_scalar("validation.loss.%s.recon_post"%task, reduced_losses[1], iteration)
        self.add_scalar("validation.loss.%s.stop"%task,  reduced_losses[2], iteration)
        self.add_scalar("validation.loss.%s.contr"%task, reduced_losses[3], iteration)
        self.add_scalar("validation.loss.%s.consi"%task, reduced_losses[4], iteration)
        self.add_scalar("validation.loss.%s.spenc"%task, reduced_losses[5], iteration)
        self.add_scalar("validation.loss.%s.spcla"%task, reduced_losses[6], iteration)
        self.add_scalar("validation.loss.%s.texcl"%task, reduced_losses[7], iteration)
        self.add_scalar("validation.loss.%s.spadv"%task, reduced_losses[8], iteration)

        self.add_scalar('validation.acc.%s.spenc'%task, reduced_acces[0], iteration)
        self.add_scalar('validation.acc.%s.spcla'%task, reduced_acces[1], iteration)
        self.add_scalar('validation.acc.%s.texcl'%task, reduced_acces[2], iteration)
        
        predicted_mel, post_output, predicted_stop, alignments, \
            text_hidden, mel_hidden,  text_logit_from_mel_hidden, \
            audio_seq2seq_alignments, \
            speaker_logit_from_mel, speaker_logit_from_mel_hidden, \
            text_lengths, mel_lengths, SE_alignments = y_pred

        #predicted_mel, post_output, predicted_stop, alignments, \
        #    text_hidden, mel_hidden,  text_logit_from_mel_hidden, \
        #    audio_seq2seq_alignments, \
        #    speaker_logit_from_mel_hidden, \
        #    text_lengths, mel_lengths = y_pred

        # text_target, mel_target, spc_target, speaker_target,  stop_target  = y
        text_target, mel_target, speaker_target,  stop_target  = y

        stop_target = stop_target.reshape(stop_target.size(0), -1, int(stop_target.size(1)/predicted_stop.size(1)))
        stop_target = stop_target[:,:,0]
        #pdb.set_trace()

        # plot distribution of parameters
        #for tag, value in model.named_parameters():
        #    tag = tag.replace('.', '/')
        #    self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, stop target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        alignments = alignments.data.cpu().numpy()
        audio_seq2seq_alignments = audio_seq2seq_alignments.data.cpu().numpy()
        SE_alignments = SE_alignments.data.cpu().numpy()

        self.add_image(
            "%s.alignment"%task,
            plot_alignment_to_numpy(alignments[idx].T),
            iteration, dataformats='HWC')
        
        # plot more alignments
        plot_alignment(alignments[:4], self.ali_path+'/step-%d-%s.pdf'%(iteration, task))

        self.add_image(
            "%s.audio_seq2seq_alignment"%task,
            plot_alignment_to_numpy(audio_seq2seq_alignments[idx].T),
            iteration, dataformats='HWC')

        self.add_image(
            "%s.SE_alignments"%task,
            plot_alignment_to_numpy(SE_alignments[idx].T),
            iteration, dataformats='HWC')

        self.add_image(
            "%s.mel_target"%task,
            plot_spectrogram_to_numpy(mel_target[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        
        self.add_image(
            "%s.mel_predicted"%task,
            plot_spectrogram_to_numpy(predicted_mel[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')
        
        # self.add_image(
        #     "%s.spc_target"%task,
        #     plot_spectrogram_to_numpy(spc_target[idx].data.cpu().numpy()),
        #     iteration, dataformats='HWC')
        
        self.add_image(
            "%s.post_predicted"%task,
            plot_spectrogram_to_numpy(post_output[idx].data.cpu().numpy()),
            iteration, dataformats='HWC')

        self.add_image(
            "%s.stop"%task,
            plot_gate_outputs_to_numpy(
                stop_target[idx].data.cpu().numpy(),
                F.sigmoid(predicted_stop[idx]).data.cpu().numpy()),
            iteration, dataformats='HWC')
Exemple #17
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image("alignment",
                       plot_alignment_to_numpy(
                           alignments[idx].data.cpu().numpy().T),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_target",
                       plot_spectrogram_to_numpy(
                           mel_targets[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_predicted",
                       plot_spectrogram_to_numpy(
                           mel_outputs[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("gate",
                       plot_gate_outputs_to_numpy(
                           gate_targets[idx].data.cpu().numpy(),
                           torch.sigmoid(
                               gate_outputs[idx]).data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')


# import random
# import torch.nn.functional as F
# from tensorboardX import SummaryWriter
# from plotting_utils import plot_alignment_to_numpy, plot_spectrogram_to_numpy
# from plotting_utils import plot_gate_outputs_to_numpy

# class Tacotron2Logger(SummaryWriter):
#     def __init__(self, logdir):
#         super(Tacotron2Logger, self).__init__(logdir)

#     def log_training(self, reduced_loss, grad_norm, learning_rate, duration,
#                      iteration):
#             self.add_scalar("training.loss", reduced_loss, iteration)
#             self.add_scalar("grad.norm", grad_norm, iteration)
#             self.add_scalar("learning.rate", learning_rate, iteration)
#             self.add_scalar("duration", duration, iteration)

#     def log_validation(self, reduced_loss, model, y, y_pred, iteration):
#         self.add_scalar("validation.loss", reduced_loss, iteration)
#         _, mel_outputs, gate_outputs, alignments = y_pred
#         mel_targets, gate_targets = y

#         # plot distribution of parameters
#         for tag, value in model.named_parameters():
#             tag = tag.replace('.', '/')
#             self.add_histogram(tag, value.data.cpu().numpy(), iteration)

#         # plot alignment, mel target and predicted, gate target and predicted
#         idx = random.randint(0, alignments.size(0) - 1)
#         self.add_image(
#             "alignment",
#             plot_alignment_to_numpy(alignments[idx].data.cpu().numpy().T),
#             iteration)
#         self.add_image(
#             "mel_target",
#             plot_spectrogram_to_numpy(mel_targets[idx].data.cpu().numpy()),
#             iteration)
#         self.add_image(
#             "mel_predicted",
#             plot_spectrogram_to_numpy(mel_outputs[idx].data.cpu().numpy()),
#             iteration)
#         self.add_image(
#             "gate",
#             plot_gate_outputs_to_numpy(
#                 gate_targets[idx].data.cpu().numpy(),
#                 F.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
#             iteration)
Exemple #18
0
    def log_validation(self, reduced_loss, model, x, y, y_pred, iteration,
                       epoch, sample_rate):
        text_padded, input_lengths, mel_padded, max_len, output_lengths = x

        #self.add_scalar("validation.loss", reduced_loss, iteration) # Tensorboard log
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y

        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            # self.add_histogram(tag, value.data.cpu().numpy(), iteration) # Tensorboard log
            wandb.log(
                {
                    tag: wandb.Histogram(value.data.cpu().numpy()),
                    "epoch": epoch,
                    "iteration": iteration
                },
                step=iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)

        text_len = input_lengths[idx].item()
        text_string = sequence_to_text(text_padded[idx].tolist())[:text_len]

        mel_len = get_mel_length(alignments, idx, text_len)
        mel = mel_outputs[idx:idx + 1, :, :mel_len]

        np_wav = self.mel2wav(mel.type('torch.cuda.HalfTensor'))

        np_alignment = plot_alignment_to_numpy(
            alignments[idx].data.cpu().numpy().T, decoding_len=mel_len)
        '''self.add_image(
            "alignment",
            np_alignment,
            iteration, dataformats='HWC')'''

        np_mel_target = plot_spectrogram_to_numpy(
            mel_targets[idx].data.cpu().numpy())
        '''self.add_image(
            "mel_target",
            np_mel_target,
            iteration, dataformats='HWC')'''

        np_mel_predicted = plot_spectrogram_to_numpy(
            mel_outputs[idx].data.cpu().numpy())
        '''self.add_image(
            "mel_predicted",
            np_mel_predicted,
            iteration, dataformats='HWC')'''

        np_gate = plot_gate_outputs_to_numpy(
            gate_targets[idx].data.cpu().numpy(),
            torch.sigmoid(gate_outputs[idx]).data.cpu().numpy())
        '''self.add_image(
            "gate",
            np_gate,
            iteration, dataformats='HWC')'''

        # wandb log
        wandb.log(
            {
                "val/loss":
                reduced_loss,
                "val/alignment":
                [wandb.Image(np_alignment, caption=text_string)],
                "val/audio": [
                    wandb.Audio(np_wav.astype(np.float32),
                                caption=text_string,
                                sample_rate=sample_rate)
                ],
                "val/mel_target": [wandb.Image(np_mel_target)],
                "val/mel_predicted": [wandb.Image(np_mel_predicted)],
                "val/gate": [wandb.Image(np_gate)],
                "epoch":
                epoch,
                "iteration":
                iteration
            },
            step=iteration)

        # foward attention ratio
        hop_list = [1]
        for hop_size in hop_list:
            mean_far, batch_far = forward_attention_ratio(
                alignments, input_lengths, hop_size)
            log_name = "mean_forward_attention_ratio.val/hop_size={}".format(
                hop_size)
            wandb.log(
                {
                    log_name: mean_far,
                    "epoch": epoch,
                    "iteration": iteration
                },
                step=iteration)
            log_name = "forward_attention_ratio.val/hop_size={}".format(
                hop_size)
            wandb.log(
                {
                    log_name: wandb.Histogram(batch_far.data.cpu().numpy()),
                    "epoch": epoch,
                    "iteration": iteration
                },
                step=iteration)
Exemple #19
0
    def log_validation(self, reduced_loss, model, y, y_pred, iteration, x):
        self.add_scalar("validation.loss", reduced_loss, iteration)
        _, mel_outputs, gate_outputs, alignments = y_pred
        mel_targets, gate_targets = y
        text_inputs = x[0]
        speaker_ids = x[5]
        # plot distribution of parameters
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            self.add_histogram(tag, value.data.cpu().numpy(), iteration)

        # plot alignment, mel target and predicted, gate target and predicted
        idx = random.randint(0, alignments.size(0) - 1)
        self.add_image("alignment",
                       plot_alignment_to_numpy(
                           alignments[idx].data.cpu().numpy().T),
                       iteration,
                       dataformats='HWC')
        self.add_image("mel_predicted",
                       plot_spectrogram_to_numpy(
                           mel_outputs[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        self.add_image("gate",
                       plot_gate_outputs_to_numpy(
                           gate_targets[idx].data.cpu().numpy(),
                           torch.sigmoid(
                               gate_outputs[idx]).data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        # 记录一下合成的语音效果。
        audio_predicted = inv_linearspectrogram(
            mel_outputs[idx].data.cpu().numpy())
        self.add_audio('audio_predicted',
                       torch.from_numpy(audio_predicted),
                       iteration,
                       sample_rate=default_hparams.sample_rate)
        self.add_image("mel_target",
                       plot_spectrogram_to_numpy(
                           mel_targets[idx].data.cpu().numpy()),
                       iteration,
                       dataformats='HWC')
        audio_target = inv_linearspectrogram(
            mel_targets[idx].data.cpu().numpy())
        self.add_audio('audio_target',
                       torch.from_numpy(audio_target),
                       iteration,
                       sample_rate=default_hparams.sample_rate)

        spk = int(speaker_ids[idx].data.cpu().numpy().flatten()[0])
        ph_ids = text_inputs[idx].data.cpu().numpy().flatten()
        phs_text = sequence_to_text(ph_ids)
        phs_size = len(ph_ids)
        reduced_loss = float(reduced_loss)
        audt_duration = int(
            len(audio_target) / (default_hparams.sample_rate / 1000))
        audp_duration = int(
            len(audio_predicted) / (default_hparams.sample_rate / 1000))
        spect_shape = mel_targets[idx].data.cpu().numpy().shape
        specp_shape = mel_outputs[idx].data.cpu().numpy().shape
        align_shape = alignments[idx].data.cpu().numpy().T.shape
        out_text = dict(speaker_id=spk,
                        phonemes=phs_text,
                        phonemes_size=phs_size,
                        validation_loss=reduced_loss,
                        audio_target_ms=audt_duration,
                        audio_predicted_ms=audp_duration,
                        spectrogram_target_shape=str(spect_shape),
                        spectrogram_predicted_shape=str(specp_shape),
                        alignment_shape=str(align_shape))
        out_text = json.dumps(out_text, indent=4, ensure_ascii=False)
        out_text = f'<pre>{out_text}</pre>'  # 支持html标签
        self.add_text('text', out_text, iteration)