def synthesize(text):
    input = text + "|00-" + lang + "|" + lang

    # Change to Multi_TTS path
    sys.path.append(
        os.path.join(os.path.dirname(__file__),
                     "dependencies/Multilingual_Text_to_Speech"))

    if "utils" in sys.modules: del sys.modules["utils"]

    from synthesize import synthesize
    from utils import build_model

    # Load Mulilingual pretrained model
    model = build_model(
        os.path.abspath("./dependencies/checkpoints/generated_switching.pyt"))
    model.eval()

    # generate spectogram
    spectogram = synthesize(model, "|" + input)

    # Change to WaveRNN Path
    sys.path.append(
        os.path.join(os.path.dirname(__file__), "dependencies/WaveRNN"))

    if "utils" in sys.modules: del sys.modules["utils"]

    from models.fatchord_version import WaveRNN
    from utils import hparams as hp
    from gen_wavernn import generate
    import torch

    # Load WaveRNN pretrained model
    hp.configure("hparams.py")
    model = WaveRNN(
        rnn_dims=hp.voc_rnn_dims,
        fc_dims=hp.voc_fc_dims,
        bits=hp.bits,
        pad=hp.voc_pad,
        upsample_factors=hp.voc_upsample_factors,
        feat_dims=hp.num_mels,
        compute_dims=hp.voc_compute_dims,
        res_out_dims=hp.voc_res_out_dims,
        res_blocks=hp.voc_res_blocks,
        hop_length=hp.hop_length,
        sample_rate=hp.sample_rate,
        mode=hp.voc_mode).to(
            torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    model.load(
        os.path.join(os.path.dirname(__file__),
                     "dependencies/checkpoints/wavernn_weight.pyt"))

    waveform = generate(model, s, hp.voc_gen_batched, hp.voc_target,
                        hp.voc_overlap)

    f = write("./temp/result.wav", "x")
    f.write(waveform)
    f.close()
def get_wavernn_model(model_path):
    device = torch.device('cuda')
    print()
    model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                    fc_dims=hp.voc_fc_dims,
                    bits=hp.bits,
                    pad=hp.voc_pad,
                    upsample_factors=hp.voc_upsample_factors,
                    feat_dims=hp.num_mels,
                    compute_dims=hp.voc_compute_dims,
                    res_out_dims=hp.voc_res_out_dims,
                    res_blocks=hp.voc_res_blocks,
                    hop_length=hp.hop_length,
                    sample_rate=hp.sample_rate,
                    mode=hp.voc_mode).to(device)

    model.load(model_path)
    return model
Example #3
0
print("Cur Dir", os.getcwd())

if "utils" in sys.modules:
    del sys.modules["utils"]

sys.path.append(WAVERNN_FOLDER)

from gen_wavernn import generate
from utils import hparams as hp
from models.fatchord_version import WaveRNN

hp.configure(WAVERNN_FOLDER+'/hparams.py')
model = WaveRNN(rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors,
                feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks,
                hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).to('cpu')
model.load(CHECKPOINTS_FOLDER + "/" + wavernn_chpt)

y = []

ix=1
while os.path.exists(CHR_FOLDER+"/"+str(ix)+".npy"):
    print("Found", CHR_FOLDER+"/"+str(ix)+".npy")
    y.append(np.load(CHR_FOLDER+"/"+str(ix)+".npy"))
    ix+=1

idx=1
for s in y:
    waveform = generate(model, s, hp.voc_gen_batched,
                      hp.voc_target, hp.voc_overlap)
    sf.write("wg-"+str(idx)+".wav", waveform, hp.sample_rate)
    idx+=1
Example #4
0
    # Instantiate WaveRNN Model
    voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                        fc_dims=hp.voc_fc_dims,
                        bits=hp.bits,
                        pad=hp.voc_pad,
                        upsample_factors=hp.voc_upsample_factors,
                        feat_dims=hp.num_mels,
                        compute_dims=hp.voc_compute_dims,
                        res_out_dims=hp.voc_res_out_dims,
                        res_blocks=hp.voc_res_blocks,
                        hop_length=hp.hop_length,
                        sample_rate=hp.sample_rate,
                        mode='MOL').to(device)

    voc_model.load('quick_start/voc_weights/latest_weights.pyt')

    print('\nInitialising Tacotron Model...\n')

    # Instantiate Tacotron Model
    tts_model = Tacotron(embed_dims=hp.tts_embed_dims,
                         num_chars=len(symbols),
                         encoder_dims=hp.tts_encoder_dims,
                         decoder_dims=hp.tts_decoder_dims,
                         n_mels=hp.num_mels,
                         fft_bins=hp.num_mels,
                         postnet_dims=hp.tts_postnet_dims,
                         encoder_K=hp.tts_encoder_K,
                         lstm_dims=hp.tts_lstm_dims,
                         postnet_K=hp.tts_postnet_K,
                         num_highways=hp.tts_num_highways,
Example #5
0
        # Instantiate WaveRNN Model
        voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                            fc_dims=hp.voc_fc_dims,
                            bits=hp.bits,
                            pad=hp.voc_pad,
                            upsample_factors=hp.voc_upsample_factors,
                            feat_dims=hp.num_mels,
                            compute_dims=hp.voc_compute_dims,
                            res_out_dims=hp.voc_res_out_dims,
                            res_blocks=hp.voc_res_blocks,
                            hop_length=hp.hop_length,
                            sample_rate=hp.sample_rate,
                            mode=hp.voc_mode).to(device)

        voc_load_path = args.voc_weights if args.voc_weights else paths.voc_latest_weights
        voc_model.load(voc_load_path)

    print('\nInitialising Forward TTS Model...\n')
    tts_model = ForwardTacotron(
        embed_dims=hp.forward_embed_dims,
        num_chars=len(phonemes),
        durpred_rnn_dims=hp.forward_durpred_rnn_dims,
        durpred_conv_dims=hp.forward_durpred_conv_dims,
        durpred_dropout=hp.forward_durpred_dropout,
        pitch_rnn_dims=hp.forward_pitch_rnn_dims,
        pitch_conv_dims=hp.forward_pitch_conv_dims,
        pitch_dropout=hp.forward_pitch_dropout,
        pitch_emb_dims=hp.forward_pitch_emb_dims,
        pitch_proj_dropout=hp.forward_pitch_proj_dropout,
        rnn_dim=hp.forward_rnn_dims,
        postnet_k=hp.forward_postnet_K,
Example #6
0
                    bits=hp.bits,
                    pad=hp.voc_pad,
                    upsample_factors=hp.voc_upsample_factors,
                    feat_dims=hp.num_mels,
                    compute_dims=hp.voc_compute_dims,
                    res_out_dims=hp.voc_res_out_dims,
                    res_blocks=hp.voc_res_blocks,
                    hop_length=hp.hop_length,
                    sample_rate=hp.sample_rate,
                    mode=hp.voc_mode).to(device)

    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    voc_weights = args.voc_weights if args.voc_weights else paths.voc_latest_weights

    model.load(voc_weights)

    simple_table([('Generation Mode', 'Batched' if batched else 'Unbatched'),
                  ('Target Samples', target if batched else 'N/A'),
                  ('Overlap Samples', overlap if batched else 'N/A')])

    if gta:
        save_path = paths.voc_output / 'gta'
    else:
        save_path = paths.voc_output / 'natural'
    save_path.mkdir(parents=False, exist_ok=True)
    print(f'Saving to {save_path}')
    # import pdb; pdb.set_trace()

    if file:
        file = Path(file).expanduser()
Example #7
0
def thak():
    class Tshamsoo():
        force_cpu = os.getenv('FORCE_CPU', False)
        hp_file = 'hparams.py'
        vocoder = os.getenv('VOCODER', 'wavernn')
        batched = os.getenv('BATCHED', True)
        target = os.getenv('TARGET', None)
        overlap = os.getenv('OVERLAP', None)
        tts_weights = None
        save_attn = os.getenv('SAVE_ATTN', False)
        voc_weights = None
        iters = os.getenv('GL_ITERS', 32)

    args = Tshamsoo()
    if args.vocoder in ['griffinlim', 'gl']:
        args.vocoder = 'griffinlim'
    elif args.vocoder in ['wavernn', 'wr']:
        args.vocoder = 'wavernn'
    else:
        raise argparse.ArgumentError('Must provide a valid vocoder type!')

    hp.configure(args.hp_file)  # Load hparams from file

    tts_weights = args.tts_weights
    save_attn = args.save_attn

    paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id)

    if not args.force_cpu and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print('Using device:', device)

    if args.vocoder == 'wavernn':
        # set defaults for any arguments that depend on hparams
        if args.target is None:
            args.target = hp.voc_target
        if args.overlap is None:
            args.overlap = hp.voc_overlap
        if args.batched is None:
            args.batched = hp.voc_gen_batched

        batched = args.batched
        target = int(args.target)
        overlap = int(args.overlap)

        print('\nInitialising WaveRNN Model...\n')
        # Instantiate WaveRNN Model
        voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                            fc_dims=hp.voc_fc_dims,
                            bits=hp.bits,
                            pad=hp.voc_pad,
                            upsample_factors=hp.voc_upsample_factors,
                            feat_dims=hp.num_mels,
                            compute_dims=hp.voc_compute_dims,
                            res_out_dims=hp.voc_res_out_dims,
                            res_blocks=hp.voc_res_blocks,
                            hop_length=hp.hop_length,
                            sample_rate=hp.sample_rate,
                            mode=hp.voc_mode).to(device)

        voc_load_path = args.voc_weights if args.voc_weights else paths.voc_latest_weights
        voc_model.load(voc_load_path)
    else:
        voc_model = None
        batched = None
        target = None
        overlap = None

    print('\nInitialising Tacotron Model...\n')

    # Instantiate Tacotron Model
    tts_model = Tacotron(embed_dims=hp.tts_embed_dims,
                         num_chars=len(symbols),
                         encoder_dims=hp.tts_encoder_dims,
                         decoder_dims=hp.tts_decoder_dims,
                         n_mels=hp.num_mels,
                         fft_bins=hp.num_mels,
                         postnet_dims=hp.tts_postnet_dims,
                         encoder_K=hp.tts_encoder_K,
                         lstm_dims=hp.tts_lstm_dims,
                         postnet_K=hp.tts_postnet_K,
                         num_highways=hp.tts_num_highways,
                         dropout=hp.tts_dropout,
                         stop_threshold=hp.tts_stop_threshold).to(device)

    tts_load_path = tts_weights if tts_weights else paths.tts_latest_weights
    tts_model.load(tts_load_path)
    return args, voc_model, tts_model, batched, target, overlap, save_attn
def main():
    # Parse Arguments
    parser = argparse.ArgumentParser(description='TTS Generator')

    parser.add_argument('--mel',
                        type=str,
                        help='[string/path] path to test mel file')

    parser.add_argument('--hp_file',
                        metavar='FILE',
                        default='hparams.py',
                        help='The file to use for the hyperparameters')

    parser.add_argument('--batched',
                        '-b',
                        dest='batched',
                        action='store_true',
                        help='Fast Batched Generation')

    parser.add_argument(
        '--voc_weights',
        type=str,
        help='[string/path] Load in different FastSpeech weights',
        default="pretrained/wave_800K.pyt")

    args = parser.parse_args()

    if not os.path.exists('onnx'):
        os.mkdir('onnx')

    hp.configure(args.hp_file)

    device = torch.device('cpu')
    print('Using device:', device)

    #####
    print('\nInitialising WaveRNN Model...\n')
    # Instantiate WaveRNN Model
    voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims,
                        fc_dims=hp.voc_fc_dims,
                        bits=hp.bits,
                        pad=hp.voc_pad,
                        upsample_factors=hp.voc_upsample_factors,
                        feat_dims=hp.num_mels,
                        compute_dims=hp.voc_compute_dims,
                        res_out_dims=hp.voc_res_out_dims,
                        res_blocks=hp.voc_res_blocks,
                        hop_length=hp.hop_length,
                        sample_rate=hp.sample_rate,
                        mode=hp.voc_mode).to(device)

    voc_load_path = args.voc_weights
    voc_model.load(voc_load_path)

    voc_upsampler = WaveRNNUpsamplerONNX(voc_model, args.batched,
                                         hp.voc_target, hp.voc_overlap)
    voc_infer = WaveRNNONNX(voc_model)

    voc_model.eval()
    voc_upsampler.eval()
    voc_infer.eval()

    opset_version = 11

    with torch.no_grad():
        mels = np.load(args.mel)
        mels = torch.from_numpy(mels)
        mels = mels.unsqueeze(0)
        mels = voc_upsampler.pad_tensor(mels)

        mels_onnx = mels.clone()

        torch.onnx.export(voc_upsampler,
                          mels_onnx,
                          "./onnx/wavernn_upsampler.onnx",
                          opset_version=opset_version,
                          do_constant_folding=True,
                          input_names=["mels"],
                          output_names=["upsample_mels", "aux"])

        mels, aux = voc_upsampler(mels)
        mels = mels[:, 550:-550, :]

        mels, aux = voc_upsampler.fold(mels, aux)

        h1, h2, x = voc_infer.get_initial_parameters(mels)

        aux_split = voc_infer.split_aux(aux)

        b_size, seq_len, _ = mels.size()

        if seq_len:
            m_t = mels[:, 0, :]

            a1_t, a2_t, a3_t, a4_t = \
                (a[:, 0, :] for a in aux_split)

            rnn_input = (m_t, a1_t, a2_t, a3_t, a4_t, h1, h2, x)
            torch.onnx.export(voc_infer,
                              rnn_input,
                              "./onnx/wavernn_rnn.onnx",
                              opset_version=opset_version,
                              do_constant_folding=True,
                              input_names=[
                                  "m_t", "a1_t", "a2_t", "a3_t", "a4_t", "h1",
                                  "h2", "x"
                              ],
                              output_names=["logits", "h1", "h2"])

    print('Done!')