Beispiel #1
0
 def test_compute_monophone_ppg(self):
     deps = ppg.DependenciesPPG()
     wave_data = feat.read_wav_kaldi(self.wav_path)
     ppgs = ppg.compute_monophone_ppg(wave_data, deps.nnet, deps.lda,
                                      deps.monophone_trans)
     reduce_ppg_dim = deps.monophone_trans.num_rows
     self.assertEqual(ppgs.shape[1], reduce_ppg_dim)
     self.assertAlmostEqual(ppgs.sum(), ppgs.shape[0], 1)
Beispiel #2
0
def image_ppg(ppg_np):
    """
    Input: 
        ppg: numpy array
    Return:
        ax: 画布信息
        im:图像信息
    """
    ppg_deps = ppg.DependenciesPPG()
    ppg_M = Matrix(ppg_np)
    monophone_ppgs = ppg.reduce_ppg_dim(ppg_M, ppg_deps.monophone_trans)
    monophone_ppgs = monophone_ppgs.numpy().T

    fig, ax = plt.subplots(figsize=(10, 6))
    im = ax.imshow(monophone_ppgs,
                   aspect="auto",
                   origin="lower",
                   interpolation='none')
    return ax, im
    def get_monophone_ppg(self) -> ndarray:
        """A wrapper function to initialize the monophone ppg of this utterance.

        Requires non-empty waveform, fs, and kaldi_shift.

        Returns:
            The monophone ppgs in numpy ndarray format.
        """
        if self.kaldi_shift < 1:  # ms
            raise ValueError('Invalid frame kaldi frame shift parameter %d.',
                             self.kaldi_shift)
        if self.wav.size == 0 or self.fs < 0:
            raise ValueError('To perform alignment, the object must contain '
                             'valid speech data and sampling frequency.')

        wav_kaldi = read_wav_kaldi_internal(self.wav, self.fs)
        ppg_deps = ppg.DependenciesPPG()
        self.monophone_ppg = ppg.compute_monophone_ppg(wav_kaldi, ppg_deps.nnet,
                                                       ppg_deps.lda,
                                                       ppg_deps.monophone_trans,
                                                       self.kaldi_shift)
        return self.monophone_ppg
Beispiel #4
0
    logging.debug('Denoiser strength: %f', denoiser_strength)
    logging.debug('Denoiser mode: %s', denoiser_mode)

    hparams = create_hparams_stage()
    taco_stft = TacotronSTFT(hparams.filter_length, hparams.hop_length,
                             hparams.win_length, hparams.n_acoustic_feat_dims,
                             hparams.sampling_rate, hparams.mel_fmin,
                             hparams.mel_fmax)

    # Load models.
    tacotron_model = load_model(hparams)
    tacotron_model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
    _ = tacotron_model.eval()
    waveglow_model = load_waveglow_model(waveglow_path)

    deps = ppg.DependenciesPPG()

    if os.path.isfile(teacher_utt_path):
        logging.info('Perform AC on %s', teacher_utt_path)
        teacher_ppg = get_ppg(teacher_utt_path, deps)
        ac_mel = get_inference(teacher_ppg, tacotron_model, is_clip)
        ac_wav = waveglow_audio(ac_mel, waveglow_model, waveglow_sigma, True)
        ac_wav = denoiser(ac_wav,
                          strength=denoiser_strength)[:, 0].cpu().numpy().T

        output_file = os.path.join(output_dir, 'ac.wav')
        wavfile.write(output_file, fs, ac_wav)
    else:
        logging.warning('Missing %s', teacher_utt_path)

    logging.info('Done!')
Beispiel #5
0
 def test_ppg_dependencies(self):
     deps = ppg.DependenciesPPG()
     self.assertIsNotNone(deps)