noise = autograd.Variable(noise) real = dataset.__next__() loss_real = criterion(dis(real), ones) fake = gen(noise) loss_fake = criterion(dis(fake), zeros) gradient_penalty = 0 loss_d = loss_real + loss_fake + gradient_penalty grad_d = torch.autograd.grad( loss_d, inputs=(dis.parameters()), create_graph=True) for p, g in zip(dis.parameters(), grad_d): p.grad = g dis_optimizer.extrapolation() noise = torch.randn(_batch_size, z_dim) ones = Variable(torch.ones(_batch_size)) zeros = Variable(torch.zeros(_batch_size)) if use_cuda: noise = noise.cuda() ones = ones.cuda() zeros = zeros.cuda() noise = autograd.Variable(noise) fake = gen(noise) loss_g = criterion(dis(fake), ones) grad_g = torch.autograd.grad( loss_g, inputs=(gen.parameters()), create_graph=True) for p, g in zip(gen.parameters(), grad_g): p.grad = g