def train(dataset: Dataset): writer = SummaryWriter(log_dir="./log" + '/' + args.type + '_' + args.opt + '_lr' + str(args.lr)) train_set = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size) G = Generator(args.noise_size).to(device) D = Discriminator(args.type).to(device) # optimizer_G = torch.optim.Adam(G.parameters(), lr=args.lr) # optimizer_D = torch.optim.Adam(D.parameters(), lr=args.lr) if args.opt == 'rms': optimizer_G = torch.optim.RMSprop(G.parameters(), lr=args.lr) optimizer_D = torch.optim.RMSprop(D.parameters(), lr=args.lr) else: # sgd optimizer_G = torch.optim.SGD(G.parameters(), lr=args.lr) optimizer_D = torch.optim.SGD(D.parameters(), lr=args.lr) for epoch in range(args.epochs): G.train() D.train() loss_G_avg = 0.0 loss_D_avg = 0.0 for real_data in train_set: # 更新D real_data = real_data.to(device) # 真实的数据 noise = torch.randn(real_data.size(0), args.noise_size).to(device) # 随机噪声 fake_data = G(noise).to(device) # 生成的数据(假数据) # log(D(x)+log(1-D(G(z)))) 注意fake_data这里不参加backward故detach if args.type == 'wgan': loss_D = -(D(real_data) - D(fake_data.detach())).mean() else: loss_D = -(torch.log(D(real_data)) + torch.log( torch.ones(args.batch_size).to(device) - D(fake_data.detach()))).mean() optimizer_D.zero_grad() loss_D.backward() optimizer_D.step() loss_D_avg += loss_D.item() # wgan则需截断参数 if args.type == 'wgan': for p in D.parameters(): p.data.clamp_(-args.wgan_c, args.wgan_c) D.zero_grad() # 更新G noise = torch.randn(real_data.size(0), args.noise_size).to(device) # 随机噪声 fake_data = G(noise).to(device) # 生成的数据(假数据) if args.type == 'wgan': loss_G = -D(fake_data).mean() else: loss_G = (torch.log( torch.ones(args.batch_size).to(device) - D(fake_data))).mean() # log(1-D(G(z)))) optimizer_G.zero_grad() loss_G.backward() optimizer_G.step() loss_G_avg += loss_G.item() G.zero_grad() loss_G_avg /= len(train_set) loss_D_avg /= len(train_set) print('Epoch {} loss_G: {:.6f} loss_D: {:.6f}'.format( epoch + 1, loss_G_avg, loss_D_avg)) writer.add_scalar('train/G_loss', loss_G_avg, epoch + 1, walltime=epoch + 1) writer.add_scalar('train/D_loss', loss_D_avg, epoch + 1, walltime=epoch + 1) writer.flush() if (epoch + 1) % 10 == 0: visualize(G, D, dataset.get_numpy_data(), epoch + 1, args.type + '/' + args.opt + '_lr' + str(args.lr)) writer.close()
def train_GAN(self, data_loader_train, device): lr = 0.0002 netG = Generator().to(device) netD = Discriminator().to(device) # Initialize BCELoss function criterion = nn.BCELoss() # Create batch of latent vectors that we will use to visualize # the progression of the generator nz = 100 fixed_noise = torch.randn(64, nz, 1, 1, device=device) beta1 = 0.5 # Establish convention for real and fake labels during training real_label = 1. fake_label = 0. # Setup Adam optimizers for both G and D optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) # Training Loop # Lists to keep track of progress img_list = [] G_losses = [] D_losses = [] iters = 0 num_epochs = 150 print("Starting Training Loop...") # For each epoch for epoch in range(num_epochs): # For each batch in the dataloader with tqdm(total=len(train_data_loader)) as t: for i, data in enumerate(data_loader_train, 0): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### ## Train with all-real batch netD.zero_grad() # Format batch real_cpu = data[0].to(device) b_size = real_cpu.size(0) label = torch.full((b_size, ), real_label, dtype=torch.float, device=device) # Forward pass real batch through D output = netD(real_cpu).view(-1) # Calculate loss on all-real batch errD_real = criterion(output, label) # Calculate gradients for D in backward pass errD_real.backward() D_x = output.mean().item() ## Train with all-fake batch # Generate batch of latent vectors noise = torch.randn(b_size, nz, 1, 1, device=device) # Generate fake image batch with G fake = netG(noise) label.fill_(fake_label) # Classify all fake batch with D output = netD(fake.detach()).view(-1) # Calculate D's loss on the all-fake batch errD_fake = criterion(output, label) # Calculate the gradients for this batch errD_fake.backward() D_G_z1 = output.mean().item() # Add the gradients from the all-real and all-fake batches errD = errD_real + errD_fake # Update D optimizerD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ########################### netG.zero_grad() label.fill_( real_label) # fake labels are real for generator cost # Since we just updated D, perform another forward pass of all-fake batch through D output = netD(fake).view(-1) # Calculate G's loss based on this output errG = criterion(output, label) # Calculate gradients for G errG.backward() D_G_z2 = output.mean().item() # Update G optimizerG.step() # Output training stats t.set_postfix(epoch='{0}'.format(epoch), loss_g='{:05.3f}'.format(errG.item()), loss_d='{:05.3f}'.format(errD.item())) t.update() # if i % 50 == 0: # print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' # % (epoch, num_epochs, i, len(data_loader_train), # errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) # Save Losses for plotting later G_losses.append(errG.item()) D_losses.append(errD.item()) # Check how the generator is doing by saving G's output on fixed_noise if (iters % 10 == 0) or ((epoch == num_epochs - 1) and (i == len(data_loader_train) - 1)): with torch.no_grad(): fake = netG(fixed_noise).detach().cpu() img_list.append( vutils.make_grid(fake, padding=2, normalize=True)) iters += 1 return G_losses, D_losses, img_list