コード例 #1
0
    def __init_model(self):
        input_text = "你好, 很高兴认识你"
        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=config.baker_mapper_pretrained_path)
        input_ids = self.processor.text_to_sequence(input_text, inference=True)
        # tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker )
        # self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False,  name="tacotron2" )
        tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker)
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=None,
            is_build=
            False,  # don't build model if you want to save it to pb. (TF related bug)
            name="tacotron2")
        self.tacotron2.setup_window(win_front=5, win_back=5)
        _, mel_outputs, stop_token_prediction, alignment_history = self.tacotron2.inference(
            tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
            tf.convert_to_tensor([len(input_ids)], tf.int32),
            tf.convert_to_tensor([0], dtype=tf.int32))
        self.tacotron2.load_weights(config.tacotron2_pretrained_path)
        tf.saved_model.save(self.tacotron2,
                            "./test_saved",
                            signatures=self.tacotron2.inference)
        self.tacotron2 = tf.saved_model.load("./test_saved")

        mb_melgan_config = AutoConfig.from_pretrained(
            config.multiband_melgan_baker)
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=config.multiband_melgan_pretrained_path,
            name="mb_melgan")

        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=config.baker_mapper_pretrained_path)
コード例 #2
0
    def __init__(self):
        self.converter = opencc.OpenCC('tw2s.json')
        tts_model_dir = os.environ['TTS_MODEL_DIR']
        if not os.path.exists(tts_model_dir):
            parent_dir = os.path.dirname(tts_model_dir)
            zip_file_path = os.path.join(parent_dir, 'model_files.zip')
            download_blob('dailybrief', 'models/model_files.zip', zip_file_path)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                zip_ref.extractall(parent_dir)
        tacotron2_config_file = os.path.join(
            tts_model_dir, 'config/tacotron2.baker.v1.yaml')
        mb_melgan_config_file = os.path.join(
            tts_model_dir, 'config/multiband_melgan.baker.v1.yaml')
        tacotron2_config = AutoConfig.from_pretrained(tacotron2_config_file)
        mb_melgan_config = AutoConfig.from_pretrained(mb_melgan_config_file)
        text2mel_model_file = os.path.join(
            tts_model_dir, 'models/tacotron-model-100000.h5')
        vocoder_model_file = os.path.join(tts_model_dir, 'models/generator-920000.h5')
        baker_mapper_file = os.path.join(tts_model_dir, 'models/baker_mapper.json')

        self.text2mel_model = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=text2mel_model_file,
            name='tacotron2'
        )
        self.vocoder_model = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=vocoder_model_file,
            name='mb_melgan'
        )
        self.processor = AutoProcessor.from_pretrained(pretrained_path=baker_mapper_file)
コード例 #3
0
    def __init__(self):
        if (Text2SpeechModel == "dc_tts"):
            self.g = Graph(mode="synthesize")
            print("Text2Speech Tensorflow Graph loaded")
        elif (Text2SpeechModel == "RTVC"):
            enc_model_fpath = os.path.join(
                root_file_path, "RTVC", "encoder/saved_models/pretrained.pt")
            syn_model_dir = os.path.join(
                root_file_path, "RTVC",
                "synthesizer/saved_models/logs-pretrained")
            voc_model_fpath = os.path.join(
                root_file_path, "RTVC",
                "vocoder/saved_models/pretrained/pretrained.pt")
            encoder.load_model(enc_model_fpath)
            self.synthesizer = Synthesizer(os.path.join(
                syn_model_dir, "taco_pretrained"),
                                           low_mem=False)
            vocoder.load_model(voc_model_fpath)
            in_fpath = os.path.join("/",
                                    *root_file_path.split("/")[:-1],
                                    "REF/refaudioRTVC/ref.wav")
            preprocessed_wav = encoder.preprocess_wav(in_fpath)
            original_wav, sampling_rate = librosa.load(in_fpath)
            preprocessed_wav = encoder.preprocess_wav(original_wav,
                                                      sampling_rate)
            embed = encoder.embed_utterance(preprocessed_wav)
            self.embeds = [embed]
        elif (Text2SpeechModel == "AudioSynth"):
            taco_pretrained_config_path = os.path.join(
                root_file_path,
                'AudioSynth/TensorFlowTTS/examples/tacotron2/conf/tacotron2.v1.yaml'
            )
            tacotron2_config = AutoConfig.from_pretrained(
                taco_pretrained_config_path)
            taco_path = os.path.join(root_file_path,
                                     "AudioSynth/tacotron2-120k.h5")
            self.tacotron2 = TFAutoModel.from_pretrained(
                config=tacotron2_config,
                pretrained_path=taco_path,
                training=False,
                name="tacotron2")

            melgan_stft_pretrained_config_path = os.path.join(
                root_file_path,
                'AudioSynth/TensorFlowTTS/examples/melgan.stft/conf/melgan.stft.v1.yaml'
            )
            melgan_stft_config = AutoConfig.from_pretrained(
                melgan_stft_pretrained_config_path)
            melgan_stft_path = os.path.join(root_file_path,
                                            "AudioSynth/melgan.stft-2M.h5")
            self.melgan_stft = TFAutoModel.from_pretrained(
                config=melgan_stft_config,
                pretrained_path=melgan_stft_path,
                name="melgan_stft")
            self.processor = AutoProcessor.from_pretrained(
                pretrained_path=os.path.join(
                    root_file_path, "AudioSynth/ljspeech_mapper.json"))
            mels, alignment_history, audios = do_synthesis(
                "Hello, how can I help you today?", self.tacotron2,
                self.melgan_stft, "TACOTRON", "MELGAN-STFT", self.processor)
コード例 #4
0
def init_speech():
    global sf
    global tf
    global TFAutoModel
    global AutoProcessor
    import soundfile as sf
    import tensorflow as tf
    from tensorflow_tts.inference import TFAutoModel
    from tensorflow_tts.inference import AutoProcessor

    global fastspeech2
    global mb_melgan
    global processor
    # initialize fastspeech2 model.
    fastspeech2 = TFAutoModel.from_pretrained(
        "tensorspeech/tts-fastspeech2-ljspeech-en")

    # initialize mb_melgan model
    mb_melgan = TFAutoModel.from_pretrained(
        "tensorspeech/tts-mb_melgan-ljspeech-en")

    # inference
    processor = AutoProcessor.from_pretrained(
        "tensorspeech/tts-fastspeech2-ljspeech-en")
    inference("Hello sir")
    debug("Speech", "init")
コード例 #5
0
 def __init__(self):
     # initialize tts model. fastspeech2 or tacotron2
     self.tts_model = TFAutoModel.from_pretrained(
         "tensorspeech/tts-fastspeech2-kss-ko")
     # initialize mb_melgan model
     self.mb_melgan = TFAutoModel.from_pretrained(
         "tensorspeech/tts-mb_melgan-kss-ko")
     # inference
     self.processor = AutoProcessor.from_pretrained(
         "tensorspeech/tts-fastspeech2-kss-ko")
コード例 #6
0
def test_auto_model(config_path):
    config = AutoConfig.from_pretrained(pretrained_path=config_path)
    model = TFAutoModel.from_pretrained(pretrained_path=None, config=config)

    # test save_pretrained
    config.save_pretrained("./test_saved")
    model.save_pretrained("./test_saved")

    # test from_pretrained
    config = AutoConfig.from_pretrained("./test_saved/config.yml")
    model = TFAutoModel.from_pretrained("./test_saved/model.h5", config=config)
コード例 #7
0
    def __init_model(self):
        tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker)
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False, name="tacotron2")
        self.tacotron2.setup_window(win_front=5, win_back=5)

        mb_melgan_config = AutoConfig.from_pretrained(config.multiband_melgan_baker)
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan")

        self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
コード例 #8
0
    def __init__(self):
        # gpu memory의 1/3 만을 할당하기로 제한
        gpu_options = tf.compat.v1.GPUOptions(
            per_process_gpu_memory_fraction=0.8)
        conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        # 탄력적인 메모리 할당
        #conf.gpu_options.allow_growth = True
        session = tf.compat.v1.Session(config=conf)

        # tacotron 설정, 학습된 모델 가져오기
        module_path = os.path.dirname(os.path.abspath(__file__))
        tacotron2_config = AutoConfig.from_pretrained(
            os.path.join(module_path,
                         'examples/tacotron2/conf/tacotron2.song44k.v5.yaml'))
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/tacotron2/exp/train.tacotron2.song44k.v5/checkpoints/model-68000.h5"
            ),
            name="tacotron2")

        # fast speech 설정, 학습된 모델 가져오기
        fastspeech2_config = AutoConfig.from_pretrained(
            os.path.join(
                module_path,
                'examples/fastspeech2/conf/fastspeech2.song44k.v5.1.yaml'))
        self.fastspeech2 = TFAutoModel.from_pretrained(
            config=fastspeech2_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/fastspeech2/exp/train.fastspeech2.song44k.v5.1/checkpoints/model-600000.h5"
            ),
            name="fastspeech2")

        # mel gan 설정, 학습된 모델 가져오기
        mb_melgan_config = AutoConfig.from_pretrained(
            os.path.join(
                module_path,
                'examples/multiband_melgan/conf/multiband_melgan.ko.song44k.v5.1.yaml'
            ))
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=os.path.join(
                module_path,
                "examples/multiband_melgan/exp/train.multiband_melgan.ko.song44k.v5.1/checkpoints/generator-1740000.h5"
            ),
            name="mb_melgan")

        #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기
        self.processor = AutoProcessor.from_pretrained(
            pretrained_path=os.path.join(module_path,
                                         "test/files/kss_mapper.json"))
コード例 #9
0
    def __init__(self):
        # gpu memory의 1/3 만을 할당하기로 제한
        gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8) 
        conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        # 탄력적인 메모리 할당
        #conf.gpu_options.allow_growth = True
        session = tf.compat.v1.Session(config=conf)

        # tacotron 설정, 학습된 모델 가져오기
        module_path = os.path.dirname(os.path.abspath(__file__))        
        # pdb.set_trace()
        tacotron2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/tacotron2/conf/tacotron2.song8k.v3.yaml'))
        self.tacotron2 = TFAutoModel.from_pretrained(
            config=tacotron2_config,
            pretrained_path=os.path.join(module_path,"./examples/tacotron2/exp/train.tacotron2.song8k.v3/checkpoints/model-68000.h5"),
            name="tacotron2"
        )

        # fast speech 설정, 학습된 모델 가져오기
        fastspeech2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/fastspeech2/conf/fastspeech2.song8k.v3.yaml'))
        self.fastspeech2 = TFAutoModel.from_pretrained(
            config=fastspeech2_config,
            # pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v1.1/checkpoints/model-200000.h5"),
            pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v3/checkpoints/model-200000.h5"),
            name="fastspeech2"
        )

        # fastspeech1_config = AutoConfig.from_pretrained(os.path.join(module_path,'examples/fastspeech/conf/fastspeech.v3_song44k_v51.yaml'))
        # self.fastspeech1 = TFAutoModel.from_pretrained(
        #     config=fastspeech1_config,
        #     pretrained_path=os.path.join(module_path,"examples/fastspeech/exp/train.fastspeech.song.v41/checkpoints/model-200000.h5"),
        #     name="fastspeech1"
        # )          
        # resizing positional embedding
        # self.fastspeech1._build()
        # self.fastspeech1.save_weights("./resize.h5")
        # self.fastspeech1.resize_positional_embeddings(8000)
        # self.fastspeech1.load_weights("./resize.h5", by_name=True, skip_mismatch=True)   

        # mel gan 설정, 학습된 모델 가져오기
        mb_melgan_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/multiband_melgan/conf/multiband_melgan.ko.8k.v3.yaml'))
        self.mb_melgan = TFAutoModel.from_pretrained(
            config=mb_melgan_config,
            pretrained_path=os.path.join(module_path,"./examples/multiband_melgan/exp/train.multiband_melgan.ko.song8k.v3/checkpoints/generator-1000000.h5"),
            name="mb_melgan"
        )

        #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기
        self.processor = AutoProcessor.from_pretrained(pretrained_path=os.path.join(module_path,"test/files/kss_mapper.json"))
コード例 #10
0
ファイル: models.py プロジェクト: X-CCS/TensorFlowTTS_chinese
    def _converter_model(self):
        with open( config.tacotron2_baker ) as f:
            conf = yaml.load(f, Loader=yaml.Loader)
        conf = Tacotron2Config(**conf["tacotron2_params"])
        self.tacotron2 = TFTacotron2(config=conf, training=False, name="tacotron2", enable_tflite_convertible=True)
        self.tacotron2.setup_window(win_front=5, win_back=5)
        self.tacotron2.setup_maximum_iterations(1000) # be careful
        self.tacotron2._build()
        self.tacotron2.load_weights(config.tacotron2_pretrained_path)
        tacotron2_concrete_function = self.tacotron2.inference_tflite.get_concrete_function()
        converter = tf.lite.TFLiteConverter.from_concrete_functions( [tacotron2_concrete_function] )
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ]
        tflite_model = converter.convert()
        with open('tacotron2.tflite', 'wb') as f:
            f.write(tflite_model)
        
        print('Model size is %f MBs.' % (len(tflite_model) / 1024 / 1024.0) )

        #tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker )
        #self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path='tacotron2.tflite', training=False,  name="tacotron2" )
        #self.tacotron2.setup_window(win_front=5, win_back=5)
        self.interpreter = tf.lite.Interpreter(model_path='tacotron2.tflite')
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        mb_melgan_config = AutoConfig.from_pretrained( config.multiband_melgan_baker )
        self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan" )

        self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
コード例 #11
0
# The Original Code is Copyright (C) 2020 Voxell Technologies.
# All rights reserved.

import soundfile as sf
import tensorflow as tf
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json")

# initialize tacotron2 model
config = AutoConfig.from_pretrained("../tacotron2/conf/tacotron2.v1.yaml")
tacotron2 = TFAutoModel.from_pretrained(
  config=config, 
  pretrained_path=None,
  is_build=True,
  name="tacotron2"
)

tacotron2.setup_window(win_front=6, win_back=6)
tacotron2.setup_maximum_iterations(3000)

tacotron2.load_weights("../tacotron2/checkpoints/model-120000.h5")
# tf.saved_model.save(tacotron2, "../tacotron2/inference", signatures=tacotron2.inference)
# tacotron2 = tf.saved_model.load("../tacotron2/inference")

# initialize melgan model
melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml')
melgan = TFAutoModel.from_pretrained(
  config=melgan_config,
  pretrained_path="../melgan/checkpoints/generator-1500000.h5"
コード例 #12
0
import yaml
import numpy as np
import matplotlib.pyplot as plt

import scipy.io.wavfile
import re
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import AutoProcessor

fastspeech2_config = AutoConfig.from_pretrained('pretrained/fastspeech2_config.yml')
#fastspeech2_config.max_position_embeddings = 20000
fastspeech2 = TFAutoModel.from_pretrained(
    config=fastspeech2_config,
    pretrained_path="pretrained/fastspeech2-150k.h5",
    name="fastspeech2"
)

mb_melgan_config = AutoConfig.from_pretrained('pretrained/mb.melgan_config.yml')
mb_melgan = TFAutoModel.from_pretrained(
    config=mb_melgan_config,
    pretrained_path="pretrained/mb.melgan-940k.h5",
    name="mb_melgan"
)

processor = AutoProcessor.from_pretrained(pretrained_path="pretrained/ljspeech_mapper.json")


def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name, vocoder_name):
    input_ids = processor.text_to_sequence(input_text)
コード例 #13
0
from tensorflow_tts.configs.tacotron2 import Tacotron2Config

from tensorflow_tts.models.tacotron2 import TFTacotron2

from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

starttime = datetime.datetime.now()

# Tacotron2
tacotron2_config = AutoConfig.from_pretrained('examples/tacotron2/conf/tacotron2.baker.v1.yaml')
tacotron2 = TFAutoModel.from_pretrained(
    config=tacotron2_config,
    # pretrained_path="trained/model-60000.h5",
    # pretrained_path="trained/taco_char_server_126k.h5",
    pretrained_path="trained/taco_local_word_new_82k.h5",
    name="tacotron2"
)

# FastSpeech2
fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml')
fastspeech2 = TFAutoModel.from_pretrained(
    config=fastspeech2_config,
    pretrained_path="trained/fs_local_135k.h5",
    name="fastspeech2"
)

# MB-MelGAN
mb_melgan_config = AutoConfig.from_pretrained('examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml')
mb_melgan = TFAutoModel.from_pretrained(
コード例 #14
0
ファイル: test_auto.py プロジェクト: orikama/TensorflowTTS
def test_auto_model(config_path):
    config = AutoConfig.from_pretrained(pretrained_path=config_path)
    model = TFAutoModel.from_pretrained(config=config, pretrained_path=None)
コード例 #15
0
# The Original Code is Copyright (C) 2020 Voxell Technologies.
# All rights reserved.

import soundfile as sf
import tensorflow as tf
from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json")

# initialize fastspeech2 model.
config = AutoConfig.from_pretrained('../fastspeech2/conf/fastspeech2.v1.yaml')
fastspeech2 = TFAutoModel.from_pretrained(
    config=config,
    pretrained_path="../fastspeech2/checkpoints/model-150000.h5",
    is_build=True,
    name="fastspeech2")

# initialize melgan model
melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml')
melgan = TFAutoModel.from_pretrained(
    config=melgan_config,
    pretrained_path="../melgan/checkpoints/generator-1670000.h5")

input_text = "how much wood would a woodchuck chuck if a woodchuck could chuck wood?"
input_ids = processor.text_to_sequence(input_text)

# fastspeech2 inference
mel_before, mel_after, duration_outputs, _, _ = fastspeech2.inference(
    input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32),
コード例 #16
0
# )

# # FastSpeech2
# fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml')
# fastspeech2 = TFAutoModel.from_pretrained(
#     config=fastspeech2_config,
#     pretrained_path="trained/fastspeech2-200k.h5",
#     name="fastspeech2"
# )

# MB-MelGAN
mb_melgan_config = AutoConfig.from_pretrained(
    'examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml')
mb_melgan = TFAutoModel.from_pretrained(
    config=mb_melgan_config,
    pretrained_path=
    "trained/mb.melgan_word_428k.h5",  # "trained/mb.melgan-1M.h5"
    # is_build=False,  # don't build model if you want to save it to pb. (TF related bug)
    name="mb_melgan")

# LJSpeechProcessor
# processor = AutoProcessor.from_pretrained("./tensorflow_tts/processor/pretrained/ljspeech_mapper.json")
# processor = AutoProcessor.from_pretrained("trained/baker_mapper_mix.json")
processor = AutoProcessor.from_pretrained("trained/baker_mapper_word.json")

# save tacotron2 to pb
# def save_tacotron2_pb():
#     input_text = "i love you so much."
#     input_ids = processor.text_to_sequence(input_text)
#
#     tacotron2.setup_window(win_front=6, win_back=6)
#     tacotron2.setup_maximum_iterations(3000)
コード例 #17
0
    parser = argparse.ArgumentParser(description='Start TTS and Vocoder')
    parser.add_argument(
        '--path_fs',
        default=
        "examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5"
    )
    parser.add_argument('--path_mb',
                        default="checks/mb_melgan_or/mb.melgan-940k.h5")

    args = parser.parse_args()

    fastspeech2_config = AutoConfig.from_pretrained(
        'examples/fastspeech2/conf/fastspeech2.v1.yaml')
    fastspeech2 = TFAutoModel.from_pretrained(
        config=fastspeech2_config,
        pretrained_path=args.
        path_fs,  #"examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5",
        #training=False,
        name="fastspeech2")

    mb_melgan_config = AutoConfig.from_pretrained(
        'examples/multiband_melgan/conf/multiband_melgan.v1.yaml')
    mb_melgan = TFAutoModel.from_pretrained(
        config=mb_melgan_config,
        pretrained_path=args.
        path_mb,  #"checks/mb_melgan_or/mb.melgan-940k.h5",
        name="mb_melgan")

    processor = AutoProcessor.from_pretrained(
        pretrained_path="dump_ljspeech/ljspeech_mapper.json")

    app.run(host='0.0.0.0', port=5454)
コード例 #18
0
# import IPython.display as ipd
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
from tensorflow_tts.configs.tacotron2 import Tacotron2Config

from tensorflow_tts.models.tacotron2 import TFTacotron2

from tensorflow_tts.inference import AutoConfig
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor
starttime = datetime.datetime.now()

# Tacotron2
tacotron2_config = AutoConfig.from_pretrained(
    'examples/tacotron2/conf/tacotron2.baker.v1.yaml')
tacotron2 = TFAutoModel.from_pretrained(
    config=tacotron2_config,
    pretrained_path="trained/taco_server_word_60k.h5",
    name="tacotron2")

# # FastSpeech2
# fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml')
# fastspeech2 = TFAutoModel.from_pretrained(
#     config=fastspeech2_config,
#     pretrained_path="trained/fastspeech2-200k.h5",
#     name="fastspeech2"
# )

# MB-MelGAN
mb_melgan_config = AutoConfig.from_pretrained(
    'examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml')
mb_melgan = TFAutoModel.from_pretrained(
    config=mb_melgan_config,
コード例 #19
0
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor

# 这里会下载2个东西
"""
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Unzipping corpora/cmudict.zip.
"""

tacotron2_config = AutoConfig.from_pretrained(
    'TensorFlowTTS/examples/tacotron2/conf/tacotron2.baker.v1.yaml')
tacotron2 = TFAutoModel.from_pretrained(config=tacotron2_config,
                                        pretrained_path="tacotron2-100k.h5",
                                        training=False,
                                        name="tacotron2")
mb_melgan_config = AutoConfig.from_pretrained(
    'TensorFlowTTS/examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml'
)
mb_melgan = TFAutoModel.from_pretrained(config=mb_melgan_config,
                                        pretrained_path="mb.melgan-920k.h5",
                                        name="mb_melgan")

processor = AutoProcessor.from_pretrained(
    pretrained_path="./baker_mapper.json")


def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name,
                 vocoder_name):
    input_ids = processor.text_to_sequence(input_text, inference=True)
コード例 #20
0
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# The Original Code is Copyright (C) 2020 Voxell Technologies.
# All rights reserved.

import soundfile as sf
import tensorflow as tf

from tensorflow_tts.inference import TFAutoModel, AutoConfig, AutoProcessor

processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json")

# initialize fastspeech model.
fs_config = AutoConfig.from_pretrained('../fastspeech/conf/fastspeech.v1.yaml')
fastspeech = TFAutoModel.from_pretrained(
    config=fs_config,
    pretrained_path="../fastspeech/checkpoints/model-195000.h5")

# initialize melgan model
melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml')
melgan = TFAutoModel.from_pretrained(
    config=melgan_config,
    pretrained_path="../melgan/checkpoints/generator-1670000.h5")

ids = processor.text_to_sequence(
    "how much wood would a woodchuck chuck if a woodchuck could chuck wood?")
print(ids)
ids = tf.expand_dims(ids, 0)
# fastspeech inference

masked_mel_before, masked_mel_after, duration_outputs = fastspeech.inference(