Ejemplo n.º 1
0
def sampling_around_existing_sentence(s1, num=10):
    # NOTE: vocab based on datasets
    train_iter, test_iter, valid_iter, vocab = get_gyafc(conf)

    ckpt = torch.load(save_path)
    vae, vae_trainer = create_vae(conf, vocab)
    vae.load_state_dict(ckpt['vae_dict'])
    vae.eval()
    del ckpt

    # string to tensor
    s1_tensor = str_to_tensor(s1, vocab, conf)
    s1_tensor = on_cuda(s1_tensor.unsqueeze(0))

    mu, logvar = vae.encode(s1_tensor)
    mvn = MultivariateNormal(mu, scale_tril=torch.diag(torch.exp(logvar[0])))

    for i in range(num):
        z = mvn.sample()
        h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        G_hidden = (h_0, c_0)
        G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token])
        G_inp = on_cuda(G_inp)
        string = conf.start_token + ' '
        while G_inp[0][0].item() != vocab.stoi[conf.end_token]:
            with torch.autograd.no_grad():
                logit, G_hidden, _ = vae(None, G_inp, z, G_hidden)
            probs = F.softmax(logit[0], dim=1)
            G_inp = torch.multinomial(probs, 1)
            string += (vocab.itos[G_inp[0][0].item()] + ' ')
        print('----------------------------')
        print(string.encode('utf-8'))
Ejemplo n.º 2
0
def generate_sentences(n_examples):
    # NOTE: vocab based on datasets
    train_iter, test_iter, valid_iter, vocab = get_gyafc(conf)

    ckpt = torch.load(save_path)
    vae, vae_trainer = create_vae(conf, vocab)
    vae.load_state_dict(ckpt['vae_dict'])
    vae.eval()
    del ckpt

    for i in range(n_examples):
        z = on_cuda(torch.randn([1, conf.n_z]))
        h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        G_hidden = (h_0, c_0)
        # 2 is the index of start token in vocab stoi
        G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token])
        G_inp = on_cuda(G_inp)
        string = conf.start_token + ' '
        # until we hit end token (index 3 in vocab stoi)
        while G_inp[0][0].item() != vocab.stoi[conf.end_token]:
            with torch.autograd.no_grad():
                logit, G_hidden, _ = vae(None, G_inp, z, G_hidden)
            probs = F.softmax(logit[0], dim=1)
            G_inp = torch.multinomial(probs, 1)
            string += (vocab.itos[G_inp[0][0].item()] + ' ')
        # print(string.encode('utf-8'))
        print(string)
Ejemplo n.º 3
0
def create_g_input(x, train, vocab, conf):
    # performs random word dropout during training
    # clipping the last word in the sequence
    G_inp = x[:, 0:x.size(1) - 1].clone()
    if not train:
        return on_cuda(G_inp)

    # random word dropout
    r = np.random.rand(G_inp.size(0), G_inp.size(1))
    for i in range(len(G_inp)):
        for j in range(1, G_inp.size(1)):
            if r[i, j] < conf.word_dropout and G_inp[i, j] not in [
                    vocab.stoi[conf.pad_token], vocab.stoi[conf.end_token]
            ]:
                G_inp[i, j] = vocab.stoi[conf.unk_token]
    return on_cuda(G_inp)
Ejemplo n.º 4
0
    def forward(self, x, G_inp, z=None, G_hidden=None):
        # if testing with z sampled from random noise
        if z is None:
            batch_size, n_seq = x.size()
            # produce embedding from encoder input
            x = self.embedding(x)
            # h_T of encoder
            E_hidden = self.encoder(x)
            # mean of latent z
            mu = self.hidden_to_mu(E_hidden)
            # log variance of latent z
            logvar = self.hidden_to_logvar(E_hidden)
            # noise sampled from Normal(0, 1)
            z = on_cuda(torch.randn([batch_size, self.n_z]))
            # reparam trick: sample z = mu + eps*sigma for back prop
            z = mu + z * torch.exp(0.5 * logvar)
            # KL-divergence loss
            # kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1, 1).mean()
            kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        else:
            # training with given text
            kld = None

        # embeddings for generator input
        G_inp = self.embedding(G_inp)
        logit, G_hidden = self.generator(G_inp, z, G_hidden)
        return logit, G_hidden, kld
Ejemplo n.º 5
0
def create_vae(conf, vocab):
    # emb = torchtext.vocab.GloVe(conf.vector, conf.n_embed)
    # vae = VAE(conf, emb)
    vae = VAE(conf)
    vae.embedding.weight.data.copy_(vocab.vectors)
    vae = on_cuda(vae)
    trainer_vae = torch.optim.Adam(vae.parameters(), lr=conf.lr)
    return vae, trainer_vae
Ejemplo n.º 6
0
def interpolate_sentences(num=10):
    # NOTE: vocab based on datasets
    train_iter, test_iter, valid_iter, vocab = get_gyafc(conf)

    ckpt = torch.load(save_path)
    vae, vae_trainer = create_vae(conf, vocab)
    vae.load_state_dict(ckpt['vae_dict'])
    vae.eval()
    del ckpt

    z1 = on_cuda(torch.randn([1, conf.n_z]))
    # z2 = on_cuda(torch.randn([1, conf.n_z]))
    z2 = z1 + on_cuda(0.3 * torch.ones(z1.size()))

    int_z = torch.lerp(z1, z2,
                       on_cuda(torch.linspace(0.0, 1.0, num).unsqueeze(1)))
    # zs to strings
    for i in range(int_z.size()[0]):
        z = int_z[i, :].unsqueeze(0)
        h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G))
        G_hidden = (h_0, c_0)
        G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token])
        G_inp = on_cuda(G_inp)
        string = conf.start_token + ' '
        while G_inp[0][0].item() != vocab.stoi[conf.end_token]:
            with torch.autograd.no_grad():
                logit, G_hidden, _ = vae(None, G_inp, z, G_hidden)
            probs = F.softmax(logit[0], dim=1)
            G_inp = torch.multinomial(probs, 1)
            string += (vocab.itos[G_inp[0][0].item()] + ' ')
        print('----------------------------')
        print(string.encode('utf-8'))
Ejemplo n.º 7
0
    def __init__(self, conf):
        # create vae, load weights
        _, _, _, self.vocab = get_gyafc(conf)
        self.vae, _ = create_vae(conf, self.vocab)
        ckpt = torch.load(conf.vae_model_path)
        self.vae.load_state_dict(ckpt['vae_dict'])
        self.vae.eval()
        del(ckpt)

        # create linear shift
        self.linear_shift = on_cuda(LinearShift(conf))

        # save conf
        self.conf = conf
        # init
        self.score = 0
        self.eval_done = False

        # load dataset
        self.test = get_formality_set(conf, self.vocab)

        # scoring
        self.extractor = FeatureExtractor(conf.w2v_path, conf.corpus_dict_path)
        self.pt16_ridge = pickle.load(open(conf.pt16_path, 'rb'))
Ejemplo n.º 8
0
    def eval(self, work):
        # evaluates quality of given parameters
        # copy weights to linear shift
        mu_weight = work['mu_weight']
        mu_bias = work['mu_bias']
        var_weight = work['var_weight']
        var_bias = work['var_bias']

        with torch.no_grad():
            self.linear_shift.linear_mu[0].weight.copy_(torch.from_numpy(mu_weight).float())
            self.linear_shift.linear_mu[0].bias.copy_(torch.from_numpy(mu_bias).float())
            self.linear_shift.linear_logvar[0].weight.copy_(torch.from_numpy(var_weight).float())
            self.linear_shift.linear_logvar[0].bias.copy_(torch.from_numpy(var_bias).float())

        batch_scores = []

        for batch in self.test:
            print('New Batch')
            current_batch_scores = []
            current_batch_strings = []
            batch = on_cuda(batch.T)
            # encode batch to mu and logvars
            mu, logvar = self.vae.encode(batch)

            # put mu and logvars pass linear shift
            new_mu, new_logvar = self.linear_shift(mu, logvar)

            # loop through each batch
            for i in range(new_mu.size()[0]):
                # create distribution
                mvn = MultivariateNormal(new_mu[i, :], scale_tril=torch.diag(torch.exp(new_logvar[i, :])))

                # sample and decode
                z = mvn.sample().unsqueeze(0)

                h_0 = on_cuda(torch.zeros(self.conf.n_layers_G, 1, self.conf.n_hidden_G))
                c_0 = on_cuda(torch.zeros(self.conf.n_layers_G, 1, self.conf.n_hidden_G))
                G_hidden = (h_0, c_0)
                G_inp = torch.LongTensor(1, 1).fill_(self.vocab.stoi[self.conf.start_token])
                G_inp = on_cuda(G_inp)
                string = ''
                length = 0
                while G_inp[0][0].item() != self.vocab.stoi[self.conf.end_token]:
                    with torch.autograd.no_grad():
                        logit, G_hidden, _ = self.vae(None, G_inp, z, G_hidden)
                    probs = F.softmax(logit[0], dim=1)
                    G_inp = torch.multinomial(probs, 1)
                    if G_inp[0][0].item() != self.vocab.stoi[self.conf.end_token]:
                        string += self.vocab.itos[G_inp[0][0].item()] + ' '
                        length += 1
                    if length >= 20:
                        break
                current_batch_strings.append(string)

            print('Decode on current batch done, scoring now')
            # score on strings
            for i, sent in enumerate(current_batch_strings):
                # PT16 formality
                pt16 = self.get_pt16_score(sent)
                # bleu with original
                # TODO: how to get orignal sentence?
                # bleu = self.get_bleu_with_orig(?, sent)
                # current_batch_scores.append(self.conf.pt16_weight*pt16 + self.conf.bleu_weight*bleu)
                current_batch_scores.append(pt16)

            print('Current batch average score:', np.mean(current_batch_scores))
            batch_scores.append(np.mean(current_batch_scores))

        # TODO: process all scores to a single score?
        # score = 0 # TODO
        score = -np.mean(batch_scores)
        self.score = score
        self.eval_done = True
Ejemplo n.º 9
0
        # TODO: change what to save
        optim.dump(conf.optim_filename)
        np.savez_compressed(conf.npz_filename,
                            scores=np.array(all_scores),
                            individuals=all_individuals)

    return optim, all_scores, all_individuals

if __name__ == '__main__':
    with open('configs/default.yaml') as file:
        conf_dict = yaml.load(file, Loader=yaml.FullLoader)
    conf = Namespace(**conf_dict)
    print(conf)
    np.random.seed(conf.seed)
    torch.manual_seed(conf.seed)
    ray.init()
    optim, all_scores, all_individuals = search(conf)
    best_params = optim.recommend()
    best_linear_shift = on_cuda(LinearShift(conf))
    mu_weight = best_params['mu_weight']
    mu_bias = best_params['mu_bias']
    var_weight = best_params['var_weight']
    var_bias = best_params['var_bias']

    with torch.no_grad():
        best_linear_shift.linear_mu[0].weight.copy_(torch.from_numpy(mu_weight.value).float())
        best_linear_shift.linear_mu[0].bias.copy_(torch.from_numpy(mu_bias.value).float())
        best_linear_shift.linear_logvar[0].weight.copy_(torch.from_numpy(var_weight.value).float())
        best_linear_shift.linear_logvar[0].bias.copy_(torch.from_numpy(var_bias.value).float())

    torch.save(best_linear_shift.state_dict(), conf.linear_model_save_path)
Ejemplo n.º 10
0
def create_vae(conf, vocab):
    vae = VAE(conf)
    vae.embedding.weight.data.copy_(vocab.vectors)
    vae = on_cuda(vae)
    trainer_vae = torch.optim.Adam(vae.parameters(), lr=conf.lr)
    return vae, trainer_vae

if __name__ == '__main__':
    with open('configs/default.yaml') as file:
        conf_dict = yaml.load(file, Loader=yaml.FullLoader)
    conf = Namespace(**conf_dict)
    print(conf)
    np.random.seed(conf.seed)
    torch.manual_seed(conf.seed)

    best_linear_shift = on_cuda(LinearShift(conf))
    linear_ckpt = torch.load(conf.linear_model_save_path)
    best_linear_shift.load_state_dict(linear_ckpt)
    best_linear_shift.eval()

    _, _, _, vocab = get_gyafc(conf)
    ckpt = torch.load(conf.vae_model_path)
    vae, _ = create_vae(conf, vocab)
    vae.load_state_dict(ckpt['vae_dict'])
    vae.eval()
    del ckpt, linear_ckpt

    test = get_informal_test_set(conf, vocab)

    all_strings = []
    for batch in test:
Ejemplo n.º 11
0
def train():
    # data loading
    # train_iter, test_iter, valid_iter, vocab = get_wiki2(conf)
    train_iter, test_iter, valid_iter, vocab = get_gyafc(conf)

    # create model, load weights if necessary
    if args.resume_training:
        step, start_epoch, vae, trainer_vae = load_ckpt(conf, save_path, vocab)
    else:
        start_epoch = 0
        step = 0
        vae, trainer_vae = create_vae(conf, vocab)

    all_t_rec_loss = []
    all_t_kl_loss = []
    all_t_loss = []
    all_v_rec_loss = []
    all_v_kl_loss = []
    all_v_loss = []

    # training epochs
    for epoch in tqdm.tqdm(range(start_epoch, conf.epochs), desc='Epochs'):
        vae.train()
        # logging
        train_rec_loss = []
        train_kl_loss = []
        train_loss = []

        for batch in train_iter:
            # batch is encoder input and target ouput for generator
            batch = on_cuda(batch.T)
            G_inp = create_g_input(batch, True, vocab, conf)
            rec_loss, kl_loss, elbo, kld_coef = train_batch(vae,
                                                            trainer_vae,
                                                            batch,
                                                            G_inp,
                                                            step,
                                                            conf,
                                                            train=True)
            train_rec_loss.append(rec_loss)
            train_kl_loss.append(kl_loss)
            train_loss.append(elbo)

            # log
            if args.to_train:
                writer.add_scalar('ELBO', elbo, step)
                writer.add_scalar('Cross Entropy', rec_loss, step)
                writer.add_scalar('KL Divergence Raw', kl_loss, step)
                writer.add_scalar('KL Annealed Weight', kld_coef, step)
                writer.add_scalar('KL Divergence Weighted', kl_loss * kld_coef,
                                  step)

            # increment step
            step += 1

        # valid
        vae.eval()
        valid_rec_loss = []
        valid_kl_loss = []
        valid_loss = []

        for valid_batch in valid_iter:
            valid_batch = on_cuda(valid_batch.T)
            G_inp = create_g_input(valid_batch, True, vocab, conf)
            with torch.autograd.no_grad():
                rec_loss, kl_loss, elbo, kld_coef = train_batch(vae,
                                                                trainer_vae,
                                                                valid_batch,
                                                                G_inp,
                                                                step,
                                                                conf,
                                                                train=False)
            valid_rec_loss.append(rec_loss)
            valid_kl_loss.append(kl_loss)
            valid_loss.append(elbo)

        all_t_rec_loss.append(train_rec_loss)
        all_t_kl_loss.append(train_kl_loss)
        all_t_loss.append(train_loss)
        all_v_rec_loss.append(valid_rec_loss)
        all_v_kl_loss.append(valid_kl_loss)
        all_v_loss.append(valid_loss)
        mean_t_rec_loss = np.mean(train_rec_loss)
        mean_t_kl_loss = np.mean(train_kl_loss)
        mean_t_loss = np.mean(train_loss)
        mean_v_rec_loss = np.mean(valid_rec_loss)
        mean_v_kl_loss = np.mean(valid_kl_loss)
        mean_v_loss = np.mean(valid_loss)

        # loss_log.set_description_str(f'T_rec: ' + '%.2f'%mean_t_rec_loss +
        #     ' T_kld: ' + '%.2f'%mean_t_kl_loss + ' V_rec: ' +
        #     '%.2f'%mean_v_rec_loss + ' V_kld: ' + '%.2f'%mean_v_kl_loss)
        tqdm.tqdm.write(f'T_rec: ' + '%.2f' % mean_t_rec_loss + ' T_kld: ' +
                        '%.2f' % mean_t_kl_loss + ' T_ELBO: ' +
                        '%.2f' % mean_t_loss + ' V_rec: ' +
                        '%.2f' % mean_v_rec_loss + ' V_kld: ' +
                        '%.2f' % mean_v_kl_loss + ' V_ELBO: ' +
                        '%.2f' % mean_v_loss + ' kld_coef: ' +
                        '%.2f' % kld_coef)

        if epoch % 5 == 0:
            torch.save(
                {
                    'epoch': epoch + 1,
                    'vae_dict': vae.state_dict(),
                    'vae_trainer': trainer_vae.state_dict(),
                    'step': step
                }, save_path)

            # NOTE: npz path, still messed up, overwrites with the latest 5 when resume training
            # np.savez_compressed('data/losses_log/losses_wiki2_fixed.npz',
            #                     t_rec=np.array(all_t_rec_loss),
            #                     t_kl=np.array(all_t_kl_loss),
            #                     v_rec=np.array(all_v_rec_loss),
            #                     v_kl=np.array(all_v_kl_loss))

            np.savez_compressed(
                'data/losses_log/losses_gyafc_weightfix3_nodropout_25000crossover_long_0.0005k.npz',
                t_rec=np.array(all_t_rec_loss),
                t_kl=np.array(all_t_kl_loss),
                t_elbo=np.array(all_t_loss),
                v_rec=np.array(all_v_rec_loss),
                v_kl=np.array(all_v_kl_loss),
                v_elbo=np.array(all_v_loss))
Ejemplo n.º 12
0
 def init_hidden(self, batch_size):
     h_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
     c_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
     self.hidden = (on_cuda(h_0), on_cuda(c_0))