예제 #1
0
def inference():
    hparams = create_hparams()
    _type = 'test'
    name = 'pff'
    # name = 'position-encoding-train'
    vocoder = 'griffin_lim'
    # vocoder = 'waveglow'
    # name = 'multi-gpu'
    step = 320 * 1000
    waveglow_path = '/data1/hfzeng/work/tacotron/original_tacotron/waveglow/waveglow_old.pt'
    waveglow = torch.load(waveglow_path)['model']
    waveglow.remove_weightnorm()
    waveglow.cuda().eval()
    # texts = get_texts('./sentence.txt')
    h_path = './presets/{}.json'.format(name)
    hparams = load_hparams(h_path, hparams)
    model = load_inference_model(hparams, name, step)
    for _type in ['val']:  #, 'val', 'test']:
        texts = get_texts(_type=_type)
        inference_texts(model,
                        hparams,
                        texts,
                        step,
                        name,
                        vocoder=vocoder,
                        waveglow=waveglow,
                        _type=_type,
                        postnet=False)
예제 #2
0
def main():
    config = load_hparams()
    data_handler = DataHandler(config)
    batcher = Batcher(data_handler)

    with tf.Session() as session:
        model = Model(session=session, config=config, batcher=batcher)
        model.build_graph(session)
        model.load()
        model.train()
예제 #3
0
def test_inference():
    hparams = create_hparams()
    name = 'position-encoding-train'
    h_path = './presets/{}.json'.format(name)
    hparams = load_hparams(h_path, hparams)
    step = 770 * 1000
    # model = load_inference_model(hparams, name, step)
    texts = get_texts('./sentence.txt')
    seq, pos = prepare_inputs(hparams, text=texts[0])
    model = load_inference_model(hparams, name, step)
    encoder_outputs = model.encoder.inference(seq, pos)
    print(encoder_outputs)
예제 #4
0
import os
from unittest import TestCase
from data.data_manager import DataManager, LabeledPairDataManager
from hparams import load_hparams

abspath = os.path.abspath(os.path.dirname(__file__))
hparams = load_hparams(
    os.path.join(abspath, '../data/test_files/datasets/unlabeled_pair/'))


class TestDataManager(TestCase):
    def setUp(self):
        self.sess = None
        self.data_manager = DataManager(hparams=hparams)

    def tearDown(self):
        del self.data_manager

    def test_indexing(self):
        sentence = '1 4 6 8 2 3'
        answer = [2, 4, 0, 0, 0, 5, 6, 1, 1, 3]

        result = self.data_manager.indexing(sentence)

        self.assertListEqual(answer, result)


class TestLabeledPairDataManager(TestCase):
    def setUp(self):
        self.sess = None
        self.data_manager = LabeledPairDataManager(hparams=hparams,
                                                    fmax=8000)

    print('melspectrogram.shape', melspectrogram.shape)
    print(melspectrogram)

    plt.figure(figsize=(10, 4))
    plt.subplot(2, 1, 1)
    librosa.display.specshow(np.log(melspectrogram),
                             y_axis='mel',
                             x_axis='time',
                             hop_length=256,
                             fmin=0,
                             fmax=8000)
    plt.title('log Mel spectrogram')
    plt.subplot(2, 1, 2)
    hparams = load_hparams()
    log_mel = get_mel("LJ001-0002.wav", hparams)
    librosa.display.specshow(log_mel.data.numpy(),
                             y_axis='mel',
                             x_axis='time',
                             hop_length=256,
                             fmin=0,
                             fmax=8000)
    plt.title('original Mel spectrogram')
    plt.tight_layout()
    plt.show()

    audio_signal = librosa.feature.inverse.mel_to_audio(melspectrogram,
                                                        sr=22050,
                                                        n_fft=1024,
                                                        hop_length=256,