コード例 #1
0
def generate_cloned_samples(model,
                            cloning_text_path=None,
                            no_speakers=108,
                            fast=True,
                            p=0):

    cloning_texts = ["this is the first", "this is the second"]
    if (cloning_text_path == None):
        cloning_text_path = "./Cloning_Audio/cloning_text.txt"

    # cloning_texts = open("./Cloning_Audio/cloning_text.txt").read().splitlines()
    # no_cloning_texts = len(cloning_texts)

    all_speakers = []

    for speaker_id in range(no_speakers):
        speaker_cloning_mel = []
        # print("The Speaker being cloned speaker-{}".format(speaker_id))
        for text in cloning_texts:
            waveform, alignment, spectrogram, mel = _tts(
                model, text, p, speaker_id, fast)
            speaker_cloning_mel.append(mel)
            #print(np.array(speaker_cloning_mel).shape)
        all_speakers.append(speaker_cloning_mel)
        with open("./Cloning_Audio/speakers_cloned_voices_mel.p",
                  "wb") as fp:  #Pickling
            pickle.dump(all_speakers, fp)
        # print("")

    print("Shape of all speakers:", np.array(all_speakers).shape)
    # print(all_speakers.shape)

    # all speakers[speaker_id][cloned_audio_number]
    # print(all_speakers[0][1].shape)
    return all_speakers
コード例 #2
0
def tts(model, text, p=0, speaker_id=0, fast=True, figures=True):
    from dv3.synthesis import tts as _tts
    waveform, alignment, spectrogram, mel = _tts(model, text, p, speaker_id,
                                                 fast)
    if figures:
        visualize(alignment, spectrogram)
    IPython.display.display(Audio(waveform, rate=fs))