G_losses = [] D_losses = [] mmd_list = [] series_list = np.zeros((1, seq_length)) for n in tqdm(range(num_epoch)): # for k in range(1): for n_batch, sample_data in enumerate(data_loader): for d in range(D_rounds): ### TRAIN DISCRIMINATOR ON FAKE DATA discriminator.zero_grad() h_d = discriminator.init_hidden() h_g = generator.init_hidden() #Generating the noise and label data noise_sample = Variable(noise(len(sample_data), seq_length)) #Use this line if generator outputs hidden states: dis_fake_data, (h_g_n,c_g_n) = generator.forward(noise_sample,h_g) dis_fake_data = generator.forward(noise_sample, h_g).detach() y_pred_fake, (h_d_n, c_d_n) = discriminator(dis_fake_data, h_d) loss_fake = loss(y_pred_fake, torch.zeros([len(sample_data), 1]).cuda()) loss_fake.backward() #Train discriminator on real data
G_optimizer = optim.Adam(chain(Q.parameters(), P.parameters()), lr=5e-5, betas=(0.5, 0.999)) D_optimizer = optim.Adam(D.parameters(), lr=5e-5, betas=(0.5, 0.999)) criterion = nn.CrossEntropyLoss() training_pairs = [ variables_from_pair(input_lang, output_lang, random.choice(pairs)) for i in range(int(1e5)) ] h_Q = Q.init_hidden(batch) h_P = P.init_hidden(batch) h_D_enc = D.init_hidden(batch) h_D_gen = D.init_hidden(batch) for epoch in range(int(1e5)): x = training_pairs[epoch][0] x = x.unsqueeze(0).cpu() reset_grad([Q, P, D]) x = to_onehot(x.squeeze()).unsqueeze(0) z = Variable(torch.zeros(batch, x.size(1), hidden_size)) E_x, h_E = Q(x.cpu(), h_Q.cpu()) G_z, h_G = P(z.cpu(), h_P.cpu())