Ejemplo n.º 1
0
def train_ae(whichdecoder, batch, total_loss_ae, start_time, i):
    autoencoder.train()
    optimizer_ae.zero_grad()

    source, target, lengths = batch

    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    mask = target.gt(0)
    masked_target = target.masked_select(mask)
    output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)
    output = autoencoder(whichdecoder, source, lengths, noise=True)

    flat_output = output.view(-1, ntokens)
    masked_output = flat_output.masked_select(output_mask).view(-1, ntokens)
    loss = criterion_ce(masked_output / args.temp, masked_target)
    loss.backward()

    # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    optimizer_ae.step()

    total_loss_ae += loss.data

    accuracy = None
    if i % args.log_interval == 0 and i > 0:
        probs = F.softmax(masked_output, dim=-1)
        max_vals, max_indices = torch.max(probs, 1)
        accuracy = torch.mean(max_indices.eq(masked_target).float()).data[0]
        cur_loss = total_loss_ae[0] / args.log_interval
        elapsed = time.time() - start_time
        print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
              'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}'.format(
                  epoch, i, len(train1_data),
                  elapsed * 1000 / args.log_interval, cur_loss,
                  math.exp(cur_loss), accuracy))

        with open("{}/log.txt".format(args.outf), 'a') as f:
            f.write('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}\n'.format(
                        epoch, i, len(train1_data),
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss), accuracy))

        total_loss_ae = 0
        start_time = time.time()

    return total_loss_ae, start_time
Ejemplo n.º 2
0
def train_gan_d_into_ae(whichdecoder, batch):
    autoencoder.train()
    optimizer_ae.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))
    real_hidden = autoencoder(whichdecoder,
                              source,
                              lengths,
                              noise=False,
                              encode_only=True)
    real_hidden.register_hook(grad_hook)
    errD_real = gan_disc(real_hidden)
    errD_real.backward(mone)
    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)

    optimizer_ae.step()

    return errD_real
Ejemplo n.º 3
0
def train_gan_d(whichdecoder, batch):
    gan_disc.train()
    optimizer_gan_d.zero_grad()

    # positive samples ----------------------------
    # generate real codes
    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))

    # batch_size x nhidden
    real_hidden = autoencoder(whichdecoder,
                              source,
                              lengths,
                              noise=False,
                              encode_only=True)

    # loss / backprop
    errD_real = gan_disc(real_hidden)
    errD_real.backward(one)

    # negative samples ----------------------------
    # generate fake codes
    noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size,
                                                  args.z_size)))
    noise.data.normal_(0, 1)

    # loss / backprop
    fake_hidden = gan_gen(noise)
    errD_fake = gan_disc(fake_hidden.detach())
    errD_fake.backward(mone)

    # gradient penalty
    gradient_penalty = calc_gradient_penalty(gan_disc, real_hidden.data,
                                             fake_hidden.data)
    gradient_penalty.backward()

    optimizer_gan_d.step()
    errD = -(errD_real - errD_fake)

    return errD, errD_real, errD_fake
Ejemplo n.º 4
0
def calc_gradient_penalty(netD, real_data, fake_data):
    bsz = real_data.size(0)
    alpha = torch.rand(bsz, 1)
    alpha = alpha.expand(bsz, real_data.size(1))  # only works for 2D XXX
    alpha = to_gpu(args.cuda, Variable(alpha))
    interpolates = alpha * real_data + ((1 - alpha) * fake_data)
    interpolates = Variable(interpolates, requires_grad=True)
    disc_interpolates = netD(interpolates)

    grad_output = to_gpu(args.cuda, torch.ones(disc_interpolates.size()))

    gradients = torch.autograd.grad(outputs=disc_interpolates,
                                    inputs=interpolates,
                                    grad_outputs=grad_output,
                                    create_graph=True,
                                    retain_graph=True,
                                    only_inputs=True)[0]
    gradients = gradients.view(gradients.size(0), -1)

    gradient_penalty = (
        (gradients.norm(2, dim=1) - 1)**2).mean() * args.gan_gp_lambda
    return gradient_penalty
Ejemplo n.º 5
0
def train_gan_g():
    gan_gen.train()
    gan_gen.zero_grad()

    noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size,
                                                  args.z_size)))
    noise.data.normal_(0, 1)
    fake_hidden = gan_gen(noise)
    errG = gan_disc(fake_hidden)
    errG.backward(one)
    optimizer_gan_g.step()

    return errG
Ejemplo n.º 6
0
def classifier_regularize(whichclass, batch):
    autoencoder.train()
    autoencoder.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    target = to_gpu(args.cuda, Variable(target))
    flippedclass = abs(2 - whichclass)
    labels = to_gpu(args.cuda,
                    Variable(torch.zeros(source.size(0)).fill_(flippedclass)))

    # Train
    code = autoencoder(0, source, lengths, noise=False, encode_only=True)
    code.register_hook(grad_hook_cla)
    scores = classifier(code)
    classify_reg_loss = F.binary_cross_entropy(scores.squeeze(1), labels)
    classify_reg_loss.backward()

    torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip)
    optimizer_ae.step()

    return classify_reg_loss
Ejemplo n.º 7
0
def train_classifier(whichclass, batch):
    classifier.train()
    classifier.zero_grad()

    source, target, lengths = batch
    source = to_gpu(args.cuda, Variable(source))
    labels = to_gpu(
        args.cuda, Variable(torch.zeros(source.size(0)).fill_(whichclass - 1)))

    # Train
    code = autoencoder(0, source, lengths, noise=False,
                       encode_only=True).detach()
    scores = classifier(code)
    classify_loss = F.binary_cross_entropy(scores.squeeze(1), labels)
    classify_loss.backward()
    optimizer_classify.step()
    classify_loss = classify_loss.cpu().data[0]

    pred = scores.data.round().squeeze(1)
    accuracy = pred.eq(labels.data).float().mean()

    return classify_loss, accuracy
Ejemplo n.º 8
0
def evaluate_autoencoder(whichdecoder, data_source, epoch):
    # Turn on evaluation mode which disables dropout.
    autoencoder.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary.word2idx)
    all_accuracies = 0
    bcnt = 0
    for i, batch in enumerate(data_source):
        source, target, lengths = batch
        source = to_gpu(args.cuda, Variable(source, volatile=True))
        target = to_gpu(args.cuda, Variable(target, volatile=True))

        mask = target.gt(0)
        masked_target = target.masked_select(mask)
        # examples x ntokens
        output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens)

        hidden = autoencoder(0, source, lengths, noise=False, encode_only=True)

        # output: batch x seq_len x ntokens
        if whichdecoder == 1:
            output = autoencoder(1, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output = \
                flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals1, max_indices1 = torch.max(masked_output, 1)
            all_accuracies += \
                torch.mean(max_indices1.eq(masked_target).float()).data[0]

            max_values1, max_indices1 = torch.max(output, 2)
            max_indices2 = autoencoder.generate(2, hidden, maxlen=50)
        else:
            output = autoencoder(2, source, lengths, noise=False)
            flattened_output = output.view(-1, ntokens)
            masked_output = \
                flattened_output.masked_select(output_mask).view(-1, ntokens)
            # accuracy
            max_vals2, max_indices2 = torch.max(masked_output, 1)
            all_accuracies += \
                torch.mean(max_indices2.eq(masked_target).float()).data[0]

            max_values2, max_indices2 = torch.max(output, 2)
            max_indices1 = autoencoder.generate(1, hidden, maxlen=50)

        total_loss += criterion_ce(masked_output / args.temp,
                                   masked_target).data
        bcnt += 1

        aeoutf_from = "{}/{}_output_decoder_{}_from.txt".format(
            args.outf, epoch, whichdecoder)
        aeoutf_tran = "{}/{}_output_decoder_{}_tran.txt".format(
            args.outf, epoch, whichdecoder)
        with open(aeoutf_from, 'w') as f_from, open(aeoutf_tran,
                                                    'w') as f_trans:
            max_indices1 = \
                max_indices1.view(output.size(0), -1).data.cpu().numpy()
            max_indices2 = \
                max_indices2.view(output.size(0), -1).data.cpu().numpy()
            target = target.view(output.size(0), -1).data.cpu().numpy()
            tran_indices = max_indices2 if whichdecoder == 1 else max_indices1
            for t, tran_idx in zip(target, tran_indices):
                # real sentence
                chars = " ".join([corpus.dictionary.idx2word[x] for x in t])
                f_from.write(chars)
                f_from.write("\n")
                # transfer sentence
                chars = " ".join(
                    [corpus.dictionary.idx2word[x] for x in tran_idx])
                f_trans.write(chars)
                f_trans.write("\n")

    return total_loss[0] / len(data_source), all_accuracies / bcnt
Ejemplo n.º 9
0
    return errD_real


print("Training...")
with open("{}/log.txt".format(args.outf), 'a') as f:
    f.write('Training...\n')

# schedule of increasing GAN training loops
if args.niters_gan_schedule != "":
    gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")]
else:
    gan_schedule = []
niter_gan = 1

fixed_noise = to_gpu(args.cuda,
                     Variable(torch.ones(args.batch_size, args.z_size)))
fixed_noise.data.normal_(0, 1)
one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1

for epoch in range(1, args.epochs + 1):
    # update gan training schedule
    if epoch in gan_schedule:
        niter_gan += 1
        print("GAN training loop schedule increased to {}".format(niter_gan))
        with open("{}/log.txt".format(args.outf), 'a') as f:
            f.write("GAN training loop schedule increased to {}\n".format(
                niter_gan))

    total_loss_ae1 = 0
    total_loss_ae2 = 0