Ejemplo n.º 1
0
    model.seq2seq.decoder.max_decoder_steps = max_decoder_steps

    os.makedirs(dst_dir, exist_ok=True)
    with open(text_list_file_path, "rb") as f:
        lines = f.readlines()
        for idx, line in enumerate(lines):
            text = line.decode("utf-8")[:-1]
            words = nltk.word_tokenize(text)
            waveform, alignment, _, _ = tts(
                model, text, p=replace_pronunciation_prob, speaker_id=speaker_id, fast=True)
            dst_wav_path = join(dst_dir, "{}_{}{}.wav".format(
                idx, checkpoint_name, file_name_suffix))
            dst_alignment_path = join(
                dst_dir, "{}_{}{}_alignment.png".format(idx, checkpoint_name,
                                                        file_name_suffix))
            plot_alignment(alignment.T, dst_alignment_path,
                           info="{}, {}".format(hparams.builder, basename(checkpoint_path)))
            audio.save_wav(waveform, dst_wav_path)
            from os.path import basename, splitext
            name = splitext(basename(text_list_file_path))[0]
            if output_html:
                print("""
{}

({} chars, {} words)

<audio controls="controls" >
<source src="/audio/{}/{}/{}" autoplay/>
Your browser does not support the audio element.
</audio>

<div align="center"><img src="/audio/{}/{}/{}" /></div>
Ejemplo n.º 2
0
    model.load_state_dict(checkpoint["state_dict"])
    model.decoder.max_decoder_steps = max_decoder_steps
    model.make_generation_fast_()

    os.makedirs(dst_dir, exist_ok=True)

    with open(text_list_file_path, "rb") as f:
        lines = f.readlines()
        for idx, line in enumerate(lines):
            text = line.decode("utf-8")[:-1]
            words = nltk.word_tokenize(text)
            # print("{}: {} ({} chars, {} words)".format(idx, text, len(text), len(words)))
            waveform, alignment, _, _ = tts(model, text)
            dst_wav_path = join(dst_dir, "{}{}.wav".format(idx, file_name_suffix))
            dst_alignment_path = join(dst_dir, "{}{}_alignment.png".format(idx, file_name_suffix))
            plot_alignment(alignment.T, dst_alignment_path,
                           info="deepvoice3, {}".format(checkpoint_path))
            audio.save_wav(waveform, dst_wav_path)
            from os.path import basename, splitext
            name = splitext(basename(text_list_file_path))[0]
            print("""
{}

({} chars, {} words)

<audio controls="controls" >
<source src="/audio/deepvoice3/{}/{}{}.wav" autoplay/>
Your browser does not support the audio element.
</audio>

<div align="center"><img src="/audio/deepvoice3/{}/{}{}_alignment.png" /></div>
                  """.format(text, len(text), len(words),
Ejemplo n.º 3
0
def synthesis(checkpoint_path, preset, dst_dir, srt_path, face_path):
    global _frontend
    checkpoint_seq2seq_path = None
    checkpoint_postnet_path = None
    max_decoder_steps = 500
    file_name_suffix = ""
    replace_pronunciation_prob = float(0.0)

    # Load preset if specified
    if preset is not None:
        with open(preset) as f:
            hparams.parse_json(f.read())
    # Override hyper parameters
    hparams.parse("")
    assert hparams.name == "deepvoice3"

    _frontend = getattr(frontend, hparams.frontend)
    print(_frontend)
    import train
    train._frontend = _frontend
    from train import plot_alignment, build_model

    # Model
    model = build_model()

    # Load checkpoints separately
    if checkpoint_postnet_path is not None and checkpoint_seq2seq_path is not None:
        checkpoint = _load(checkpoint_seq2seq_path)
        model.seq2seq.load_state_dict(checkpoint["state_dict"])
        checkpoint = _load(checkpoint_postnet_path)
        model.postnet.load_state_dict(checkpoint["state_dict"])
        checkpoint_name = splitext(basename(checkpoint_seq2seq_path))[0]
    else:
        checkpoint = _load(checkpoint_path)
        model.load_state_dict(checkpoint["state_dict"])
        checkpoint_name = splitext(basename(checkpoint_path))[0]

    model.seq2seq.decoder.max_decoder_steps = max_decoder_steps

    os.makedirs(dst_dir, exist_ok=True)

    task = load_srt(srt_path, face_path)
    idx = 0
    for i in task:
        speaker_id = i[3]
        text = i[4]

        words = nltk.word_tokenize(text)
        file_name = "{} speaker_{} {}-{}".format(idx, speaker_id, i[1], i[2])
        print(text)
        waveform, alignment, _, _ = tts(model,
                                        text,
                                        p=replace_pronunciation_prob,
                                        speaker_id=speaker_id,
                                        fast=True)
        dst_wav_path = join(dst_dir, "{}.wav".format(file_name))
        dst_alignment_path = join(dst_dir,
                                  "{}_alignment.png".format(file_name))
        plot_alignment(alignment.T,
                       dst_alignment_path,
                       info="{}, {}".format(hparams.builder,
                                            basename(checkpoint_path)))
        audio.save_wav(waveform, dst_wav_path)
        print(
            idx, ": {}\n ({} chars, {} words)".format(text, len(text),
                                                      len(words)))
        idx += 1

    print(
        "Finished! Check out {} for generated audio samples.".format(dst_dir))
Ejemplo n.º 4
0
    model.seq2seq.decoder.max_decoder_steps = max_decoder_steps

    os.makedirs(out_dir, exist_ok=True)

    with open(text_file, "r") as file_reader:
        lines = file_reader.readlines()
        lines = [line.strip("\n") for line in lines]

        for idx, line in enumerate(lines):
            text = line[:-1]
            waveform, alignment, _, _ = tts(model,
                                            text,
                                            speaker_id=speaker_id,
                                            fast=True)
            out_wav_path = join(out_dir,
                                f"{idx}_{checkpoint_name}_synthesized.wav")

            out_alignment_path = join(
                out_dir, f"{idx}_{checkpoint_name}_synthesized_alignment.png")

            plot_alignment(alignment.T,
                           out_alignment_path,
                           info=f"{cfg.builder}, {basename(checkpoint_path)}")

            audio.save_wav(waveform, out_wav_path)

    print(f"Synthesis complete. Generated audio samples saved in {out_dir}")

    sys.exit(0)