def test_compute_monophone_ppg(self): deps = ppg.DependenciesPPG() wave_data = feat.read_wav_kaldi(self.wav_path) ppgs = ppg.compute_monophone_ppg(wave_data, deps.nnet, deps.lda, deps.monophone_trans) reduce_ppg_dim = deps.monophone_trans.num_rows self.assertEqual(ppgs.shape[1], reduce_ppg_dim) self.assertAlmostEqual(ppgs.sum(), ppgs.shape[0], 1)
def image_ppg(ppg_np): """ Input: ppg: numpy array Return: ax: 画布信息 im:图像信息 """ ppg_deps = ppg.DependenciesPPG() ppg_M = Matrix(ppg_np) monophone_ppgs = ppg.reduce_ppg_dim(ppg_M, ppg_deps.monophone_trans) monophone_ppgs = monophone_ppgs.numpy().T fig, ax = plt.subplots(figsize=(10, 6)) im = ax.imshow(monophone_ppgs, aspect="auto", origin="lower", interpolation='none') return ax, im
def get_monophone_ppg(self) -> ndarray: """A wrapper function to initialize the monophone ppg of this utterance. Requires non-empty waveform, fs, and kaldi_shift. Returns: The monophone ppgs in numpy ndarray format. """ if self.kaldi_shift < 1: # ms raise ValueError('Invalid frame kaldi frame shift parameter %d.', self.kaldi_shift) if self.wav.size == 0 or self.fs < 0: raise ValueError('To perform alignment, the object must contain ' 'valid speech data and sampling frequency.') wav_kaldi = read_wav_kaldi_internal(self.wav, self.fs) ppg_deps = ppg.DependenciesPPG() self.monophone_ppg = ppg.compute_monophone_ppg(wav_kaldi, ppg_deps.nnet, ppg_deps.lda, ppg_deps.monophone_trans, self.kaldi_shift) return self.monophone_ppg
logging.debug('Denoiser strength: %f', denoiser_strength) logging.debug('Denoiser mode: %s', denoiser_mode) hparams = create_hparams_stage() taco_stft = TacotronSTFT(hparams.filter_length, hparams.hop_length, hparams.win_length, hparams.n_acoustic_feat_dims, hparams.sampling_rate, hparams.mel_fmin, hparams.mel_fmax) # Load models. tacotron_model = load_model(hparams) tacotron_model.load_state_dict(torch.load(checkpoint_path)['state_dict']) _ = tacotron_model.eval() waveglow_model = load_waveglow_model(waveglow_path) deps = ppg.DependenciesPPG() if os.path.isfile(teacher_utt_path): logging.info('Perform AC on %s', teacher_utt_path) teacher_ppg = get_ppg(teacher_utt_path, deps) ac_mel = get_inference(teacher_ppg, tacotron_model, is_clip) ac_wav = waveglow_audio(ac_mel, waveglow_model, waveglow_sigma, True) ac_wav = denoiser(ac_wav, strength=denoiser_strength)[:, 0].cpu().numpy().T output_file = os.path.join(output_dir, 'ac.wav') wavfile.write(output_file, fs, ac_wav) else: logging.warning('Missing %s', teacher_utt_path) logging.info('Done!')
def test_ppg_dependencies(self): deps = ppg.DependenciesPPG() self.assertIsNotNone(deps)