예제 #1
0
    def _store_entry(self, index, linear, w):
        hp = self.hparams
        basename = self._waves[index].with_suffix(".npy").name

        seq_len = hp.mel_len * hp.r
        lin_len = linear.shape[1]
        assert lin_len <= seq_len  # sanitary check

        # mel spectrograms
        mel = np.dot(self.mel_basis, linear)  # transform to mel scale
        mel = self._padding(mel.T, (seq_len, hp.n_mels))
        mel = np.log(np.clip(mel, 1e-5, None))  # normalize to log scale
        mel = mel.reshape(-1, hp.n_mels * hp.r)
        np.save(self._path / 'mel' / basename, mel)

        # sequence of character
        text = text_normalize(self._texts[index], self.hparams.vocab)
        assert len(text) < hp.text_len
        text = text.ljust(hp.text_len, '~')
        text = [self._char2idx[ch] for ch in text]
        np.save(self._path / 'text' / basename, text)

        # gate
        gate = np.zeros(hp.mel_len, dtype=np.int)
        gate[lin_len // hp.r:] = 1
        np.save(self._path / 'gate' / basename, gate)
예제 #2
0
    def _store_entry(self, index, linear, w):
        hp = self.hparams
        basename = Path(self._waves[index]).with_suffix(".npy").name

        seq_len = hp.n_frames * hp.r
        lin_len = linear.shape[1]
        assert lin_len <= seq_len  # sanitary check

        # compute linear spectrogram
        post_linear = self._padding(linear.T, (seq_len, linear.shape[0]))
        post_linear = audio.normalize(post_linear, hp)
        np.save(self._path / 'linear' / basename, post_linear)

        # compute mel spectrogram
        mel = np.dot(self.mel_basis, linear)  # transform to mel scales
        mel = self._padding(mel.T, (seq_len, hp.n_mels))
        mel = audio.normalize(mel, hp)
        mel = mel.reshape(-1, hp.n_mels * hp.r)
        np.save(self._path / 'mel' / basename, mel)

        # compute text sequence
        text = text_normalize(self._texts[index], self.hparams.vocab)
        assert len(text) < hp.text_len
        text = text.ljust(hp.text_len, '~')
        text = [self._char2idx[ch] for ch in text]
        np.save(self._path / 'text' / basename, text)
예제 #3
0
def synthesize(args):
    char2idx = {ch: i for i, ch in enumerate(hp.vocab)}
    with open(args.f_text, 'r') as file:
        text = ''.join(file.readlines())
    # normalize the text
    text = text_normalize(text, hp.vocab)
    if len(text) >= hp.text_len - 1:
        text = text[:hp.text_len - 1]
    text += '~' * (hp.text_len - len(text))
    text = np.array([char2idx[ch] for ch in text]).reshape(-1)

    hp.batch_size = 1
    # load the model
    model = Tacotron(hp)
    model.training = False
    model.load_parameters(args.f_model)

    x_txt = nn.Variable([hp.batch_size, hp.text_len])
    _, mag, _ = model(x_txt)
    x_txt.d = text[np.newaxis, :]

    mag.forward(clear_buffer=True)
    wave = synthesize_from_spec(mag.d[0].copy(), hp)
    wavfile.write(args.f_output, rate=hp.sr, data=wave)  # write a sample
예제 #4
0
def synthesize(args):
    char2idx = {ch: i for i, ch in enumerate(hp.vocab)}
    with open(args.f_text, 'r') as file:
        text = ''.join(file.readlines())
    # normalize the text
    text = text_normalize(text, hp.vocab)
    if len(text) >= hp.text_len - 1:
        text = text[:hp.text_len - 1]
    text += '~' * (hp.text_len - len(text))
    text = np.array([char2idx[ch] for ch in text]).reshape(-1)

    hp.batch_size = 1
    # load the model
    model = Tacotron2(hp)
    model.training = False
    model.load_parameters(args.f_model)

    x_txt = nn.Variable([hp.batch_size, hp.text_len])
    _, mels, _, _ = model(x_txt)
    x_txt.d = text[np.newaxis, :]

    mels.forward(clear_buffer=True)
    m = mels.d.copy().reshape(1, -1, hp.n_mels)
    np.save(args.f_output, m.transpose((0, 2, 1)))