Ejemplo n.º 1
0
def init_model():
    hparams = create_hparams()

    checkpoint_path = "checkpoints/mellotron_libritts.pt"
    tacotron = load_model(hparams).cpu().eval()
    tacotron.load_state_dict(
        torch.load(checkpoint_path,
                   map_location=torch.device('cpu'))['state_dict'])

    waveglow_path = 'checkpoints/waveglow_256channels_v4.pt'
    waveglow = torch.load(
        waveglow_path, map_location=torch.device('cpu'))['model'].cpu().eval()
    denoiser = Denoiser(waveglow).cpu().eval()
    return (tacotron, waveglow, denoiser)
def run_test(model_dir, data_dir, mode, config_path='345M/', beam_width=10):
    config_path = config_path + 'config.json'
    vocab_path = config_path + 'vocab.json'
    merge_path = config_path + 'merges.txt'
    checkpoint_path = model_dir + '/GPT_model.pkl'
    log_filename = model_dir + '/test_data.log'

    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))

    create_log(log_filename)
    print("Building model")
    model = load_model(GPT2LMHeadModel(config), checkpoint_path,
                       test=True).cuda()
    model.eval()
    tokenizer = GPT2Tokenizer(vocab_path, merge_path)
    if mode == 'test':
        print('Loading test dataset...')
        test_data_loader = GPT2DataLoader(data_path=data_dir,
                                          vocab_file=vocab_path,
                                          bpe_merges=merge_path,
                                          bucket=2,
                                          batch_size=1,
                                          max_seq_len=512)
Ejemplo n.º 3
0
from data_loader import GPT2DataLoader
from train import run
import os
import torch

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_size = 'small'
    if model_size == 'small':
        config_path = '117M/config.json'
    elif model_size == 'middle':
        config_path = '345M/config.json'
    elif model_size == 'big':
        config_path = '762M/config.json'
    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))
    model = load_model(GPT2LMHeadModel(config), "checkpoints/small_fs.pkl")
    model = model.to(device)

    train_data_loader = GPT2DataLoader(data_path='DailyDialog/train_text.txt',
                                       vocab_file='./vocab_file/encoder.json',
                                       bpe_merges='vocab_file/merges.txt',
                                       bucket=2,
                                       batch_size=5,
                                       max_seq_len=512)

    valid_data_loader = GPT2DataLoader(data_path='DailyDialog/test_text.txt',
                                       vocab_file='./vocab_file/encoder.json',
                                       bpe_merges='vocab_file/merges.txt',
                                       bucket=2,
                                       batch_size=5,
                                       max_seq_len=512)
Ejemplo n.º 4
0
    return logits


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_size = 'middle'
    if model_size == 'small':
        config_path = '117M/config.json'
    elif model_size == 'middle':
        config_path = '345M/config.json'
    elif model_size == 'big':
        config_path = '762M/config.json'
    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))
    print(config)
    checkpoint_path ="checkpoints/medium_ft.pkl"#'Cornell_models/GPT_Cornell_models.pkl' #"checkpoints/medium_fs.pkl"
    model = load_model(GPT2LMHeadModel(config), checkpoint_path,test=False)
    model = model.to(device)

   # train_data_loader = GPT2DataLoader(data_path='DailyDialog/train_text.txt',
   #                                    vocab_file='./vocab_file/encoder.json',
   #                                    bpe_merges='vocab_file/merges.txt',
   #                                    bucket=2,
   #                                    batch_size=5,
   #                                    max_seq_len=512)
    vocab_file = './configs/345M/vocab.json'
    bpe_merges = './configs/345M/merges.txt'
    #valid_data_loader = GPT2DataLoader(data_path='DailyDialog/test_text.txt',
    #                                   vocab_file=vocab_file,
    #                                   bpe_merges=bpe_merges,
    #                                   bucket=2,
    #                                   batch_size=1,
Ejemplo n.º 5
0
def agumentation(arpabet_dict,
                 audio_paths,
                 target_spk_id_list,
                 output_path,
                 ljs=False):

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    # Step1: Basic Setups

    if not ljs:
        # Whether to use lj speech
        checkpoint_path = "mellotron_libritts.pt"
    else:
        checkpoit_path = "mellotron_ljs.pt"
    if torch.cuda.is_available():
        tacotron = load_model(hparams).cuda().eval()
    else:
        tacotron = load_model(hparams).eval()
    tacotron.load_state_dict(
        torch.load(checkpoint_path, map_location="cpu")['state_dict'])

    waveglow_path = 'waveglow_256channels_v4.pt'
    if torch.cuda.is_available():
        waveglow = torch.load(waveglow_path)['model'].cuda().eval()
        denoiser = Denoiser(waveglow).cuda().eval()
    else:
        waveglow = torch.load(waveglow_path,
                              map_location="cpu")['model'].eval().cpu()
        denoiser = Denoiser(waveglow).eval()

    arpabet_dict = cmudict.CMUDict(arpabet_dict)
    dataloader = TextMelLoader(audio_paths, hparams)
    datacollate = TextMelCollate(1)

    # Step2: Load
    for file_idx in range(len(dataloader)):
        source_scp = open(os.path.join(output_path, "source.scp"),
                          "w",
                          encoding="utf-8")

        audio_path, text, sid = dataloader.audiopaths_and_text[file_idx]
        source_scp.write("{} {}\n".format(file_idx, audio_path))

        # get audio path, encoded text, pitch contour and mel for gst
        text_encoded = torch.LongTensor(
            text_to_sequence(text, hparams.text_cleaners,
                             arpabet_dict))[None, :]
        pitch_contour = dataloader[file_idx][3][None]
        if torch.cuda.is_available():
            text_encoded = text_encoded.cuda()
            pitch_contour = pitch_contour.cuda()
        mel = load_mel(audio_path)
        # load source data to obtain rhythm using tacotron 2 as a forced aligner
        x, y = tacotron.parse_batch(datacollate([dataloader[file_idx]]))

        # Step3: Perform speaker transfer
        with torch.no_grad():
            # get rhythm (alignment map) using tacotron 2
            mel_outputs, mel_outputs_postnet, gate_outputs, rhythm = tacotron.forward(
                x)
            rhythm = rhythm.permute(1, 0, 2)

        for spk_id in target_spk_id_list:
            speaker_id = torch.LongTensor([spk_id])

            if torch.cuda.is_available():
                speaker_id = speaker_id.cuda()

            with torch.no_grad():
                mel_outputs, mel_outputs_postnet, gate_outputs, _ = tacotron.inference_noattention(
                    (text_encoded, mel, speaker_id, pitch_contour * 0.4,
                     rhythm))

            with torch.no_grad():
                audio = denoiser(
                    waveglow.infer(mel_outputs_postnet, sigma=0.8), 0.01)[:, 0]

            sf.write(
                os.path.join(output_path, "{}-{}.wav".format(file_idx,
                                                             spk_id)),
                audio.detach().cpu().numpy().T, hparams.sampling_rate)
Ejemplo n.º 6
0
    model_size = 'medium'
    if model_size == 'small':
        config_path = '117M/config.json'
        vocab_path = '117M/vocab.json'
        merges_path = '117M/merges.txt'
    elif model_size == 'medium':
        config_path = '345M/config.json'
        vocab_path = '345M/vocab.json'
        merges_path = '345M/merges.txt'
    elif model_size == 'large':
        config_path = '762M/config.json'
        vocab_path = '762M/vocab.json'
        merges_path = '762M/merges.txt'

    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))
    model = load_model(GPT2LMHeadModel(config), "checkpoints/medium_ft.pkl")
    device = range(torch.cuda.device_count())
    model = torch.nn.DataParallel(model, device_ids=device).cuda()

    vocab_file = os.path.join('./configs/', vocab_path)
    bpe_merges = os.path.join('./configs/', merges_path)

    train_data_loader = GPT2DataLoader(
        data_path='Data/Cornell_movie_dialogs/dd_train.txt',
        vocab_file=vocab_file,
        bpe_merges=bpe_merges,
        bucket=2,
        batch_size=2,
        max_seq_len=512)

    valid_data_loader = GPT2DataLoader(
Ejemplo n.º 7
0
def main(argv):
    del argv
    save, logdir, figname, logHandler = utils.configuration(FLAGS)

    train_ds, test_ds, placeholder = get_dataset(FLAGS)
    loss, correct_prediction, var_list = utils.load_model(FLAGS, placeholder)

    train_iterator = None
    test_iterator = None

    fix_opt, add_opt, stop_opt = utils.make_optimizer(placeholder, loss,
                                                      var_list)
    fix_accuracy, add_accuracy = correct_prediction

    save_dir, save_file = save
    var_all, var_m1, _ = var_list

    epoch_list, original, proposed = [], [], []

    with tf.Session() as sess:
        with tf.device('/cpu:0'):
            merged_summary = tf.summary.merge_all()
            writer = tf.summary.FileWriter(logdir)
            writer.add_graph(sess.graph)
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(var_all)

        print('Learning started. It takes sometimes...')
        print()
        for i in range(1, FLAGS.epochs + 1):
            logHandler.print_epoch()
            if i == (FLAGS.stop_point + 1):
                logHandler._print('Proposed training...')
                loader = tf.train.Saver(var_m1)
                loader.restore(sess, tf.train.latest_checkpoint(save_dir))

            if i <= FLAGS.stop_point:
                if i % FLAGS.iteration == 0:
                    loader = tf.train.Saver(var_all)
                    loader.restore(sess, tf.train.latest_checkpoint(save_dir))

                    utils.fit_model(sess, add_opt, placeholder, train_iterator,
                                    train_ds, i, FLAGS, logHandler,
                                    merged_summary, writer)

                    origin_test_accuracy = utils.test_validate(
                        sess, fix_accuracy, test_iterator, placeholder,
                        test_ds, FLAGS, logHandler)
                    proposed_test_accuracy = utils.test_validate(
                        sess, add_accuracy, test_iterator, placeholder,
                        test_ds, FLAGS, logHandler)

                else:
                    utils.fit_model(sess, fix_opt, placeholder, train_iterator,
                                    train_ds, i, FLAGS, logHandler,
                                    merged_summary, writer)

                    utils.train_validate(sess, fix_accuracy, train_iterator,
                                         placeholder, train_ds, FLAGS,
                                         logHandler)
                    origin_test_accuracy = utils.test_validate(
                        sess, fix_accuracy, test_iterator, placeholder,
                        test_ds, FLAGS, logHandler)

                    proposed_test_accuracy = utils.test_validate(
                        sess, add_accuracy, test_iterator, placeholder,
                        test_ds, FLAGS, logHandler)
                saver.save(sess, save_file)
            else:
                # loader = tf.train.Saver(var_m1)
                # loader.restore(sess, tf.train.latest_checkpoint(save_dir))
                utils.fit_model(sess, stop_opt, placeholder, train_iterator,
                                train_ds, i, FLAGS, logHandler, merged_summary,
                                writer)

                if train_iterator is not None:
                    sess.run(train_iterator.initializer)
                utils.train_validate(sess, add_accuracy, train_iterator,
                                     placeholder, train_ds, FLAGS, logHandler)
                proposed_test_accuracy = utils.test_validate(
                    sess, add_accuracy, test_iterator, placeholder, test_ds,
                    FLAGS, logHandler)

                origin_test_accuracy = utils.test_validate(
                    sess, fix_accuracy, test_iterator, placeholder, test_ds,
                    FLAGS, logHandler)

            epoch_list.append(i)
            proposed.append(proposed_test_accuracy)
            original.append(origin_test_accuracy)

        # Add_final_train_accuracy = tu.train_validate(sess, add_accuracy, train_iterator,
        #                                         X, Y, dropout_rate, train_ds, FLAGS)
        logHandler._print('Original Accuracy: ')
        origin_test_accuracy = utils.test_validate(sess, fix_accuracy,
                                                   test_iterator, placeholder,
                                                   test_ds, FLAGS, logHandler)

        logHandler._print('Proposed Accuracy: ')
        utils.test_validate(sess, add_accuracy, test_iterator, placeholder,
                            test_ds, FLAGS, logHandler)

        plot_acc(epoch_list, original, proposed, figname)
        saver.save(sess, save_file)
        logHandler._print('Training done successfully')
Ejemplo n.º 8
0
#Parameters experiment:
POPULATION_SIZE = 24
NUMBER_ROLLS = 5
GENERATION_LIMIT = 32
SCORE_LIMIT = 100
MAX_STEPS = 200  #each run should actually has 1000 steps, but this can give us time

vae = convVAE.ConvVAE()
lstm = lstm_mdn.LSTM_MDN()

#line 37 train_lstm?
#mdn = need
from datasets.generate_lstm_training import LSTMDataset  #line 63 train_lstm
sys.path.insert(1, '/users/alberto/projects/WorldModels/utils')
import train_utils
train_utils.load_model()


#Using this, as recommended by paper
#http://blog.otoro.net/2017/11/12/evolving-stable-strategies/
def rollout(k, env):
    # k is a controller instance
    # env is the car racing environment

    obs = env.reset()

    done = False
    total_reward = 0

    #while not done:
    while step_counter < MAX_STEPS:
Ejemplo n.º 9
0
    audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
    melspec = stft.mel_spectrogram(audio_norm)
    if torch.cuda.is_available():
        melspec = melspec.cuda()
    return melspec

# Step1: Basic Setups
hparams = create_hparams()

stft = TacotronSTFT(hparams.filter_length, hparams.hop_length, hparams.win_length,
                    hparams.n_mel_channels, hparams.sampling_rate, hparams.mel_fmin,
                    hparams.mel_fmax)

checkpoint_path = "mellotron_libritts.pt"
if torch.cuda.is_available():
    tacotron = load_model(hparams).cuda().eval()
else:
    tacotron = load_model(hparams).eval()
tacotron.load_state_dict(torch.load(checkpoint_path, map_location="cpu")['state_dict'])


waveglow_path = 'waveglow_256channels_v4.pt'
if torch.cuda.is_available():
    waveglow = torch.load(waveglow_path)['model'].cuda().eval()
    denoiser = Denoiser(waveglow).cuda().eval()
else:
    waveglow = torch.load(waveglow_path, map_location="cpu")['model'].eval().cpu()
    denoiser = Denoiser(waveglow).eval()


arpabet_dict = cmudict.CMUDict('data/cmu_dictionary')