def __init_model(self): input_text = "你好, 很高兴认识你" self.processor = AutoProcessor.from_pretrained( pretrained_path=config.baker_mapper_pretrained_path) input_ids = self.processor.text_to_sequence(input_text, inference=True) # tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker ) # self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False, name="tacotron2" ) tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker) self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=None, is_build= False, # don't build model if you want to save it to pb. (TF related bug) name="tacotron2") self.tacotron2.setup_window(win_front=5, win_back=5) _, mel_outputs, stop_token_prediction, alignment_history = self.tacotron2.inference( tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), tf.convert_to_tensor([len(input_ids)], tf.int32), tf.convert_to_tensor([0], dtype=tf.int32)) self.tacotron2.load_weights(config.tacotron2_pretrained_path) tf.saved_model.save(self.tacotron2, "./test_saved", signatures=self.tacotron2.inference) self.tacotron2 = tf.saved_model.load("./test_saved") mb_melgan_config = AutoConfig.from_pretrained( config.multiband_melgan_baker) self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan") self.processor = AutoProcessor.from_pretrained( pretrained_path=config.baker_mapper_pretrained_path)
def __init__(self): self.converter = opencc.OpenCC('tw2s.json') tts_model_dir = os.environ['TTS_MODEL_DIR'] if not os.path.exists(tts_model_dir): parent_dir = os.path.dirname(tts_model_dir) zip_file_path = os.path.join(parent_dir, 'model_files.zip') download_blob('dailybrief', 'models/model_files.zip', zip_file_path) with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(parent_dir) tacotron2_config_file = os.path.join( tts_model_dir, 'config/tacotron2.baker.v1.yaml') mb_melgan_config_file = os.path.join( tts_model_dir, 'config/multiband_melgan.baker.v1.yaml') tacotron2_config = AutoConfig.from_pretrained(tacotron2_config_file) mb_melgan_config = AutoConfig.from_pretrained(mb_melgan_config_file) text2mel_model_file = os.path.join( tts_model_dir, 'models/tacotron-model-100000.h5') vocoder_model_file = os.path.join(tts_model_dir, 'models/generator-920000.h5') baker_mapper_file = os.path.join(tts_model_dir, 'models/baker_mapper.json') self.text2mel_model = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=text2mel_model_file, name='tacotron2' ) self.vocoder_model = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=vocoder_model_file, name='mb_melgan' ) self.processor = AutoProcessor.from_pretrained(pretrained_path=baker_mapper_file)
def __init__(self): if (Text2SpeechModel == "dc_tts"): self.g = Graph(mode="synthesize") print("Text2Speech Tensorflow Graph loaded") elif (Text2SpeechModel == "RTVC"): enc_model_fpath = os.path.join( root_file_path, "RTVC", "encoder/saved_models/pretrained.pt") syn_model_dir = os.path.join( root_file_path, "RTVC", "synthesizer/saved_models/logs-pretrained") voc_model_fpath = os.path.join( root_file_path, "RTVC", "vocoder/saved_models/pretrained/pretrained.pt") encoder.load_model(enc_model_fpath) self.synthesizer = Synthesizer(os.path.join( syn_model_dir, "taco_pretrained"), low_mem=False) vocoder.load_model(voc_model_fpath) in_fpath = os.path.join("/", *root_file_path.split("/")[:-1], "REF/refaudioRTVC/ref.wav") preprocessed_wav = encoder.preprocess_wav(in_fpath) original_wav, sampling_rate = librosa.load(in_fpath) preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate) embed = encoder.embed_utterance(preprocessed_wav) self.embeds = [embed] elif (Text2SpeechModel == "AudioSynth"): taco_pretrained_config_path = os.path.join( root_file_path, 'AudioSynth/TensorFlowTTS/examples/tacotron2/conf/tacotron2.v1.yaml' ) tacotron2_config = AutoConfig.from_pretrained( taco_pretrained_config_path) taco_path = os.path.join(root_file_path, "AudioSynth/tacotron2-120k.h5") self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=taco_path, training=False, name="tacotron2") melgan_stft_pretrained_config_path = os.path.join( root_file_path, 'AudioSynth/TensorFlowTTS/examples/melgan.stft/conf/melgan.stft.v1.yaml' ) melgan_stft_config = AutoConfig.from_pretrained( melgan_stft_pretrained_config_path) melgan_stft_path = os.path.join(root_file_path, "AudioSynth/melgan.stft-2M.h5") self.melgan_stft = TFAutoModel.from_pretrained( config=melgan_stft_config, pretrained_path=melgan_stft_path, name="melgan_stft") self.processor = AutoProcessor.from_pretrained( pretrained_path=os.path.join( root_file_path, "AudioSynth/ljspeech_mapper.json")) mels, alignment_history, audios = do_synthesis( "Hello, how can I help you today?", self.tacotron2, self.melgan_stft, "TACOTRON", "MELGAN-STFT", self.processor)
def init_speech(): global sf global tf global TFAutoModel global AutoProcessor import soundfile as sf import tensorflow as tf from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor global fastspeech2 global mb_melgan global processor # initialize fastspeech2 model. fastspeech2 = TFAutoModel.from_pretrained( "tensorspeech/tts-fastspeech2-ljspeech-en") # initialize mb_melgan model mb_melgan = TFAutoModel.from_pretrained( "tensorspeech/tts-mb_melgan-ljspeech-en") # inference processor = AutoProcessor.from_pretrained( "tensorspeech/tts-fastspeech2-ljspeech-en") inference("Hello sir") debug("Speech", "init")
def __init__(self): # initialize tts model. fastspeech2 or tacotron2 self.tts_model = TFAutoModel.from_pretrained( "tensorspeech/tts-fastspeech2-kss-ko") # initialize mb_melgan model self.mb_melgan = TFAutoModel.from_pretrained( "tensorspeech/tts-mb_melgan-kss-ko") # inference self.processor = AutoProcessor.from_pretrained( "tensorspeech/tts-fastspeech2-kss-ko")
def test_auto_model(config_path): config = AutoConfig.from_pretrained(pretrained_path=config_path) model = TFAutoModel.from_pretrained(pretrained_path=None, config=config) # test save_pretrained config.save_pretrained("./test_saved") model.save_pretrained("./test_saved") # test from_pretrained config = AutoConfig.from_pretrained("./test_saved/config.yml") model = TFAutoModel.from_pretrained("./test_saved/model.h5", config=config)
def __init_model(self): tacotron2_config = AutoConfig.from_pretrained(config.tacotron2_baker) self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=config.tacotron2_pretrained_path, training=False, name="tacotron2") self.tacotron2.setup_window(win_front=5, win_back=5) mb_melgan_config = AutoConfig.from_pretrained(config.multiband_melgan_baker) self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan") self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
def __init__(self): # gpu memory의 1/3 만을 할당하기로 제한 gpu_options = tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=0.8) conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options) # 탄력적인 메모리 할당 #conf.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=conf) # tacotron 설정, 학습된 모델 가져오기 module_path = os.path.dirname(os.path.abspath(__file__)) tacotron2_config = AutoConfig.from_pretrained( os.path.join(module_path, 'examples/tacotron2/conf/tacotron2.song44k.v5.yaml')) self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=os.path.join( module_path, "examples/tacotron2/exp/train.tacotron2.song44k.v5/checkpoints/model-68000.h5" ), name="tacotron2") # fast speech 설정, 학습된 모델 가져오기 fastspeech2_config = AutoConfig.from_pretrained( os.path.join( module_path, 'examples/fastspeech2/conf/fastspeech2.song44k.v5.1.yaml')) self.fastspeech2 = TFAutoModel.from_pretrained( config=fastspeech2_config, pretrained_path=os.path.join( module_path, "examples/fastspeech2/exp/train.fastspeech2.song44k.v5.1/checkpoints/model-600000.h5" ), name="fastspeech2") # mel gan 설정, 학습된 모델 가져오기 mb_melgan_config = AutoConfig.from_pretrained( os.path.join( module_path, 'examples/multiband_melgan/conf/multiband_melgan.ko.song44k.v5.1.yaml' )) self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=os.path.join( module_path, "examples/multiband_melgan/exp/train.multiband_melgan.ko.song44k.v5.1/checkpoints/generator-1740000.h5" ), name="mb_melgan") #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기 self.processor = AutoProcessor.from_pretrained( pretrained_path=os.path.join(module_path, "test/files/kss_mapper.json"))
def __init__(self): # gpu memory의 1/3 만을 할당하기로 제한 gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8) conf = tf.compat.v1.ConfigProto(gpu_options=gpu_options) # 탄력적인 메모리 할당 #conf.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=conf) # tacotron 설정, 학습된 모델 가져오기 module_path = os.path.dirname(os.path.abspath(__file__)) # pdb.set_trace() tacotron2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/tacotron2/conf/tacotron2.song8k.v3.yaml')) self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path=os.path.join(module_path,"./examples/tacotron2/exp/train.tacotron2.song8k.v3/checkpoints/model-68000.h5"), name="tacotron2" ) # fast speech 설정, 학습된 모델 가져오기 fastspeech2_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/fastspeech2/conf/fastspeech2.song8k.v3.yaml')) self.fastspeech2 = TFAutoModel.from_pretrained( config=fastspeech2_config, # pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v1.1/checkpoints/model-200000.h5"), pretrained_path=os.path.join(module_path,"./examples/fastspeech2/exp/train.fastspeech2.song8k.v3/checkpoints/model-200000.h5"), name="fastspeech2" ) # fastspeech1_config = AutoConfig.from_pretrained(os.path.join(module_path,'examples/fastspeech/conf/fastspeech.v3_song44k_v51.yaml')) # self.fastspeech1 = TFAutoModel.from_pretrained( # config=fastspeech1_config, # pretrained_path=os.path.join(module_path,"examples/fastspeech/exp/train.fastspeech.song.v41/checkpoints/model-200000.h5"), # name="fastspeech1" # ) # resizing positional embedding # self.fastspeech1._build() # self.fastspeech1.save_weights("./resize.h5") # self.fastspeech1.resize_positional_embeddings(8000) # self.fastspeech1.load_weights("./resize.h5", by_name=True, skip_mismatch=True) # mel gan 설정, 학습된 모델 가져오기 mb_melgan_config = AutoConfig.from_pretrained(os.path.join(module_path,'./examples/multiband_melgan/conf/multiband_melgan.ko.8k.v3.yaml')) self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=os.path.join(module_path,"./examples/multiband_melgan/exp/train.multiband_melgan.ko.song8k.v3/checkpoints/generator-1000000.h5"), name="mb_melgan" ) #processor - 글자 별 상응하는 숫자의 mapper 설정 가져오기 self.processor = AutoProcessor.from_pretrained(pretrained_path=os.path.join(module_path,"test/files/kss_mapper.json"))
def _converter_model(self): with open( config.tacotron2_baker ) as f: conf = yaml.load(f, Loader=yaml.Loader) conf = Tacotron2Config(**conf["tacotron2_params"]) self.tacotron2 = TFTacotron2(config=conf, training=False, name="tacotron2", enable_tflite_convertible=True) self.tacotron2.setup_window(win_front=5, win_back=5) self.tacotron2.setup_maximum_iterations(1000) # be careful self.tacotron2._build() self.tacotron2.load_weights(config.tacotron2_pretrained_path) tacotron2_concrete_function = self.tacotron2.inference_tflite.get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions( [tacotron2_concrete_function] ) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] tflite_model = converter.convert() with open('tacotron2.tflite', 'wb') as f: f.write(tflite_model) print('Model size is %f MBs.' % (len(tflite_model) / 1024 / 1024.0) ) #tacotron2_config = AutoConfig.from_pretrained( config.tacotron2_baker ) #self.tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path='tacotron2.tflite', training=False, name="tacotron2" ) #self.tacotron2.setup_window(win_front=5, win_back=5) self.interpreter = tf.lite.Interpreter(model_path='tacotron2.tflite') self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() mb_melgan_config = AutoConfig.from_pretrained( config.multiband_melgan_baker ) self.mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=config.multiband_melgan_pretrained_path, name="mb_melgan" ) self.processor = AutoProcessor.from_pretrained(pretrained_path=config.baker_mapper_pretrained_path)
# The Original Code is Copyright (C) 2020 Voxell Technologies. # All rights reserved. import soundfile as sf import tensorflow as tf from tensorflow_tts.inference import AutoConfig from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json") # initialize tacotron2 model config = AutoConfig.from_pretrained("../tacotron2/conf/tacotron2.v1.yaml") tacotron2 = TFAutoModel.from_pretrained( config=config, pretrained_path=None, is_build=True, name="tacotron2" ) tacotron2.setup_window(win_front=6, win_back=6) tacotron2.setup_maximum_iterations(3000) tacotron2.load_weights("../tacotron2/checkpoints/model-120000.h5") # tf.saved_model.save(tacotron2, "../tacotron2/inference", signatures=tacotron2.inference) # tacotron2 = tf.saved_model.load("../tacotron2/inference") # initialize melgan model melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml') melgan = TFAutoModel.from_pretrained( config=melgan_config, pretrained_path="../melgan/checkpoints/generator-1500000.h5"
import yaml import numpy as np import matplotlib.pyplot as plt import scipy.io.wavfile import re from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoConfig from tensorflow_tts.inference import AutoProcessor fastspeech2_config = AutoConfig.from_pretrained('pretrained/fastspeech2_config.yml') #fastspeech2_config.max_position_embeddings = 20000 fastspeech2 = TFAutoModel.from_pretrained( config=fastspeech2_config, pretrained_path="pretrained/fastspeech2-150k.h5", name="fastspeech2" ) mb_melgan_config = AutoConfig.from_pretrained('pretrained/mb.melgan_config.yml') mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path="pretrained/mb.melgan-940k.h5", name="mb_melgan" ) processor = AutoProcessor.from_pretrained(pretrained_path="pretrained/ljspeech_mapper.json") def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name, vocoder_name): input_ids = processor.text_to_sequence(input_text)
from tensorflow_tts.configs.tacotron2 import Tacotron2Config from tensorflow_tts.models.tacotron2 import TFTacotron2 from tensorflow_tts.inference import AutoConfig from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor starttime = datetime.datetime.now() # Tacotron2 tacotron2_config = AutoConfig.from_pretrained('examples/tacotron2/conf/tacotron2.baker.v1.yaml') tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, # pretrained_path="trained/model-60000.h5", # pretrained_path="trained/taco_char_server_126k.h5", pretrained_path="trained/taco_local_word_new_82k.h5", name="tacotron2" ) # FastSpeech2 fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml') fastspeech2 = TFAutoModel.from_pretrained( config=fastspeech2_config, pretrained_path="trained/fs_local_135k.h5", name="fastspeech2" ) # MB-MelGAN mb_melgan_config = AutoConfig.from_pretrained('examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml') mb_melgan = TFAutoModel.from_pretrained(
def test_auto_model(config_path): config = AutoConfig.from_pretrained(pretrained_path=config_path) model = TFAutoModel.from_pretrained(config=config, pretrained_path=None)
# The Original Code is Copyright (C) 2020 Voxell Technologies. # All rights reserved. import soundfile as sf import tensorflow as tf from tensorflow_tts.inference import AutoConfig from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json") # initialize fastspeech2 model. config = AutoConfig.from_pretrained('../fastspeech2/conf/fastspeech2.v1.yaml') fastspeech2 = TFAutoModel.from_pretrained( config=config, pretrained_path="../fastspeech2/checkpoints/model-150000.h5", is_build=True, name="fastspeech2") # initialize melgan model melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml') melgan = TFAutoModel.from_pretrained( config=melgan_config, pretrained_path="../melgan/checkpoints/generator-1670000.h5") input_text = "how much wood would a woodchuck chuck if a woodchuck could chuck wood?" input_ids = processor.text_to_sequence(input_text) # fastspeech2 inference mel_before, mel_after, duration_outputs, _, _ = fastspeech2.inference( input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32),
# ) # # FastSpeech2 # fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml') # fastspeech2 = TFAutoModel.from_pretrained( # config=fastspeech2_config, # pretrained_path="trained/fastspeech2-200k.h5", # name="fastspeech2" # ) # MB-MelGAN mb_melgan_config = AutoConfig.from_pretrained( 'examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml') mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path= "trained/mb.melgan_word_428k.h5", # "trained/mb.melgan-1M.h5" # is_build=False, # don't build model if you want to save it to pb. (TF related bug) name="mb_melgan") # LJSpeechProcessor # processor = AutoProcessor.from_pretrained("./tensorflow_tts/processor/pretrained/ljspeech_mapper.json") # processor = AutoProcessor.from_pretrained("trained/baker_mapper_mix.json") processor = AutoProcessor.from_pretrained("trained/baker_mapper_word.json") # save tacotron2 to pb # def save_tacotron2_pb(): # input_text = "i love you so much." # input_ids = processor.text_to_sequence(input_text) # # tacotron2.setup_window(win_front=6, win_back=6) # tacotron2.setup_maximum_iterations(3000)
parser = argparse.ArgumentParser(description='Start TTS and Vocoder') parser.add_argument( '--path_fs', default= "examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5" ) parser.add_argument('--path_mb', default="checks/mb_melgan_or/mb.melgan-940k.h5") args = parser.parse_args() fastspeech2_config = AutoConfig.from_pretrained( 'examples/fastspeech2/conf/fastspeech2.v1.yaml') fastspeech2 = TFAutoModel.from_pretrained( config=fastspeech2_config, pretrained_path=args. path_fs, #"examples/fastspeech2_libritts/outdir_libri/checkpoints/model-855000.h5", #training=False, name="fastspeech2") mb_melgan_config = AutoConfig.from_pretrained( 'examples/multiband_melgan/conf/multiband_melgan.v1.yaml') mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config, pretrained_path=args. path_mb, #"checks/mb_melgan_or/mb.melgan-940k.h5", name="mb_melgan") processor = AutoProcessor.from_pretrained( pretrained_path="dump_ljspeech/ljspeech_mapper.json") app.run(host='0.0.0.0', port=5454)
# import IPython.display as ipd from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 from tensorflow_tts.configs.tacotron2 import Tacotron2Config from tensorflow_tts.models.tacotron2 import TFTacotron2 from tensorflow_tts.inference import AutoConfig from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor starttime = datetime.datetime.now() # Tacotron2 tacotron2_config = AutoConfig.from_pretrained( 'examples/tacotron2/conf/tacotron2.baker.v1.yaml') tacotron2 = TFAutoModel.from_pretrained( config=tacotron2_config, pretrained_path="trained/taco_server_word_60k.h5", name="tacotron2") # # FastSpeech2 # fastspeech2_config = AutoConfig.from_pretrained('examples/fastspeech2/conf/fastspeech2.baker.v2.yaml') # fastspeech2 = TFAutoModel.from_pretrained( # config=fastspeech2_config, # pretrained_path="trained/fastspeech2-200k.h5", # name="fastspeech2" # ) # MB-MelGAN mb_melgan_config = AutoConfig.from_pretrained( 'examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml') mb_melgan = TFAutoModel.from_pretrained( config=mb_melgan_config,
from tensorflow_tts.inference import TFAutoModel from tensorflow_tts.inference import AutoProcessor # 这里会下载2个东西 """ [nltk_data] Downloading package averaged_perceptron_tagger to [nltk_data] /root/nltk_data... [nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip. [nltk_data] Downloading package cmudict to /root/nltk_data... [nltk_data] Unzipping corpora/cmudict.zip. """ tacotron2_config = AutoConfig.from_pretrained( 'TensorFlowTTS/examples/tacotron2/conf/tacotron2.baker.v1.yaml') tacotron2 = TFAutoModel.from_pretrained(config=tacotron2_config, pretrained_path="tacotron2-100k.h5", training=False, name="tacotron2") mb_melgan_config = AutoConfig.from_pretrained( 'TensorFlowTTS/examples/multiband_melgan/conf/multiband_melgan.baker.v1.yaml' ) mb_melgan = TFAutoModel.from_pretrained(config=mb_melgan_config, pretrained_path="mb.melgan-920k.h5", name="mb_melgan") processor = AutoProcessor.from_pretrained( pretrained_path="./baker_mapper.json") def do_synthesis(input_text, text2mel_model, vocoder_model, text2mel_name, vocoder_name): input_ids = processor.text_to_sequence(input_text, inference=True)
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # The Original Code is Copyright (C) 2020 Voxell Technologies. # All rights reserved. import soundfile as sf import tensorflow as tf from tensorflow_tts.inference import TFAutoModel, AutoConfig, AutoProcessor processor = AutoProcessor.from_pretrained("../ljspeech_mapper.json") # initialize fastspeech model. fs_config = AutoConfig.from_pretrained('../fastspeech/conf/fastspeech.v1.yaml') fastspeech = TFAutoModel.from_pretrained( config=fs_config, pretrained_path="../fastspeech/checkpoints/model-195000.h5") # initialize melgan model melgan_config = AutoConfig.from_pretrained('../melgan/conf/melgan.v1.yaml') melgan = TFAutoModel.from_pretrained( config=melgan_config, pretrained_path="../melgan/checkpoints/generator-1670000.h5") ids = processor.text_to_sequence( "how much wood would a woodchuck chuck if a woodchuck could chuck wood?") print(ids) ids = tf.expand_dims(ids, 0) # fastspeech inference masked_mel_before, masked_mel_after, duration_outputs = fastspeech.inference(