def tts(model, text, p=0, speaker_id=0, fast=True, figures=True): from synthesis import tts as _tts waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) if figures: visualize(alignment, spectrogram) IPython.display.display(Audio(waveform, rate=fs))
def tts(model, text, p=0, speaker_id=0, fast=True, figures=True): from synthesis import tts as _tts waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) if figures: visualize(alignment, spectrogram) # IPython.display.display(Audio(waveform, rate=fs)) librosa.output.write_wav('out/speaker' + str(speaker_id) + '.wav', waveform, fs)
def tts(model, text, p=0, speaker_id=None, fast=True, figures=True): from synthesis import tts as _tts waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) if figures: visualize(alignment, spectrogram) sd.play(waveform, 22500) sd.wait()
def tts(model, text, file_path, p=0, speaker_id=None, fast=True): from synthesis import tts as _tts import audio waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) # 22050, 353 kbps, 16 bit, mono audio.save_wav(waveform, file_path)
def generate_cloned_samples(cloning_texts_location=None, no_speakers=108, fast=True, p=0): # Clone name = "deepvoice3_pytorch" # if not exists(name): # print("Clone the repo!!") # else: # print("Exists!") # Change working directory to the project dir os.chdir(join(expanduser("."), name)) import hparams import json import synthesis import train from deepvoice3_pytorch import frontend from train import build_model from train import restore_parts, load_checkpoint from synthesis import tts as _tts # get_ipython().system(u' python3 -m nltk.downloader cmudict') checkpoint_path = "20171222_deepvoice3_vctk108_checkpoint_step000300000.pth" if not exists(checkpoint_path): print("Dowload the Pre-Trained Network!!") # !curl -O -L "https://www.dropbox.com/s/uzmtzgcedyu531k/20171222_deepvoice3_vctk108_checkpoint_step000300000.pth" # Copy preset file (json) from master # The preset file describes hyper parameters # get_ipython().system(u' git checkout master --quiet') preset = "./presets/deepvoice3_vctk.json" # get_ipython().system(u' cp -v $preset .') # preset = "./deepvoice3_vctk.json" # And then git checkout to the working commit # This is due to the model was trained a few months ago and it's not compatible # with the current master. # ! git checkout 0421749 --quiet # ! pip install -q -e . # print(hparams.hparams.get_model_structure()) # Newly added params. Need to inject dummy values for dummy, v in [("fmin", 0), ("fmax", 0), ("rescaling", False), ("rescaling_max", 0.999), ("allow_clipping_in_normalization", False)]: #if hparams.hparams.get(dummy) is None: hparams.hparams.add_hparam(dummy, v) # Load parameters from preset with open(preset) as f: hparams.hparams.parse_json(f.read()) # Tell we are using multi-speaker DeepVoice3 hparams.hparams.builder = "deepvoice3_multispeaker" # Inject frontend text processor synthesis._frontend = getattr(frontend, "en") train._frontend = getattr(frontend, "en") # alises fs = hparams.hparams.sample_rate hop_length = hparams.hparams.hop_size model = build_model() model = load_checkpoint(checkpoint_path, model, None, True) # text = "here i am" # speaker_id = 0 # fast = True # p = 0 # waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) # print(waveform.shape) # print(alignment.shape) # print(spectrogram.shape) # print(mel.shape) # print(type(mel)) cloning_texts = ["this is the first", "this is the first"] if (cloning_texts_location == None): cloning_texts_location = "./Cloning_Audio/cloning_text.txt" # cloning_texts = open("./Cloning_Audio/cloning_text.txt").splitlines() # no_cloning_texts = len(cloning_texts) all_speakers = [] for speaker_id in range(no_speakers): speaker_cloning_mel = [] print("The Speaker being cloned speaker-{}".format(speaker_id)) for text in cloning_texts: waveform, alignment, spectrogram, mel = _tts( model, text, p, speaker_id, fast) speaker_cloning_mel.append(mel) #print(np.array(speaker_cloning_mel).shape) all_speakers.append(speaker_cloning_mel) with open("./Cloning_Audio/speakers_cloned_voices_mel.p", "wb") as fp: #Pickling pickle.dump(all_speakers, fp) print("") print(np.array(all_speakers).shape) # print(all_speakers.shape) # all speakers[speaker_id][cloned_audio_number] # print(all_speakers[0][1].shape) return all_speakers
def tts(model, text, p=0, speaker_id=5, fast=True, figures=True): from synthesis import tts as _tts waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id, fast) return waveform
def tts(model, text, filename, p=0, speaker_id=None, fast=True, figures=True): from synthesis import tts as _tts waveform, alignment, _, mel = _tts(model, text, p, speaker_id, fast) # waveform /= np.max(np.abs(waveform)) save_wav(waveform, filename)