def run_GAN(n_epoch=2, batch_size=50, use_gpu=False, dis_lr=1e-4, gen_lr=1e-3, n_update_dis=1, n_update_gen=1, update_max=None): # loading data trainloader, testloader = load_dataset(batch_size=batch_size) # initialize models Dis_model = Discriminator() Gen_model = Generator() if use_gpu: Dis_model = Dis_model.cuda() Gen_model = Gen_model.cuda() # assign loss function and optimizer to D and G D_criterion = torch.nn.BCELoss() D_optimizer = optim.SGD(Dis_model.parameters(), lr=dis_lr, momentum=0.9) G_criterion = torch.nn.BCELoss() G_optimizer = optim.SGD(Gen_model.parameters(), lr=gen_lr, momentum=0.9) train_GAN(Dis_model, Gen_model, D_criterion, G_criterion, D_optimizer, G_optimizer, trainloader, n_epoch, batch_size, n_update_dis, n_update_gen, update_max=update_max)
class GAN_Manager: def __init__(self, discriminator_in_nodes, generator_out_nodes, ps_model, ps_model_type, device): self.discriminator = Discriminator( in_nodes=discriminator_in_nodes).to(device) self.discriminator.apply(self.__weights_init) self.generator = Generator(out_nodes=generator_out_nodes).to(device) self.generator.apply(self.__weights_init) self.loss = nn.BCELoss() self.ps_model = ps_model self.ps_model_type = ps_model_type def get_generator(self): return self.generator def train_GAN(self, train_parameters, device): epochs = train_parameters["epochs"] train_set = train_parameters["train_set"] lr = train_parameters["lr"] shuffle = train_parameters["shuffle"] batch_size = train_parameters["batch_size"] BETA = train_parameters["BETA"] data_loader_train = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle) g_optimizer = optim.Adam(self.generator.parameters(), lr=lr) d_optimizer = optim.Adam(self.discriminator.parameters(), lr=lr) for epoch in range(epochs): epoch += 1 total_G_loss = 0 total_D_loss = 0 total_prop_loss = 0 total_d_pred_real = 0 total_d_pred_fake = 0 for batch in data_loader_train: covariates_X_control, ps_score_control, y_f = batch covariates_X_control = covariates_X_control.to(device) covariates_X_control_size = covariates_X_control.size(0) ps_score_control = ps_score_control.squeeze().to(device) # 1. Train Discriminator real_data = covariates_X_control # Generate fake data fake_data = self.generator( self.__noise(covariates_X_control_size)).detach() # Train D d_error, d_pred_real, d_pred_fake = self.__train_discriminator( d_optimizer, real_data, fake_data) total_D_loss += d_error total_d_pred_real += d_pred_real total_d_pred_fake += d_pred_fake # 2. Train Generator # Generate fake data fake_data = self.generator( self.__noise(covariates_X_control_size)) # Train G error_g, prop_loss = self.__train_generator( g_optimizer, fake_data, BETA, ps_score_control, device) total_G_loss += error_g total_prop_loss += prop_loss if epoch % 1000 == 0: print( "Epoch: {0}, D_loss: {1}, D_score_real: {2}, D_score_Fake: {3}, G_loss: {4}, " "Prop_loss: {5}".format(epoch, total_D_loss, total_d_pred_real, total_d_pred_fake, total_G_loss, total_prop_loss)) def eval_GAN(self, eval_size, device): treated_g = self.generator(self.__noise(eval_size)) ps_score_list_treated = self.__get_propensity_score(treated_g, device) return treated_g, ps_score_list_treated def __cal_propensity_loss(self, ps_score_control, gen_treated, device): ps_score_list_treated = self.__get_propensity_score( gen_treated, device) ps_score_treated = torch.tensor(ps_score_list_treated).to(device) ps_score_control = ps_score_control.to(device) prop_loss = torch.sum((torch.sub(ps_score_treated.float(), ps_score_control.float()))**2) return prop_loss def __get_propensity_score(self, gen_treated, device): if self.ps_model_type == Constants.PS_MODEL_NN: return self.__get_propensity_score_NN(gen_treated, device) else: return self.__get_propensity_score_LR(gen_treated) def __get_propensity_score_LR(self, gen_treated): ps_score_list_treated = self.ps_model.predict_proba( gen_treated.cpu().detach().numpy())[:, -1].tolist() return ps_score_list_treated def __get_propensity_score_NN(self, gen_treated, device): # Assign Treated Y = np.ones(gen_treated.size(0)) eval_set = Utils.convert_to_tensor(gen_treated.cpu().detach().numpy(), Y) ps_eval_parameters_NN = {"eval_set": eval_set} ps_score_list_treated = self.ps_model.eval(ps_eval_parameters_NN, device, eval_from_GAN=True) return ps_score_list_treated @staticmethod def __noise(_size): n = Variable( torch.normal(mean=0, std=1, size=(_size, Constants.GAN_GENERATOR_IN_NODES))) # print(n.size()) if torch.cuda.is_available(): return n.cuda() return n @staticmethod def __weights_init(m): if type(m) == nn.Linear: nn.init.xavier_uniform_(m.weight) torch.nn.init.zeros_(m.bias) @staticmethod def __real_data_target(size): data = Variable(torch.ones(size, 1)) if torch.cuda.is_available(): return data.cuda() return data @staticmethod def __fake_data_target(size): data = Variable(torch.zeros(size, 1)) if torch.cuda.is_available(): return data.cuda() return data def __train_discriminator(self, optimizer, real_data, fake_data): # Reset gradients optimizer.zero_grad() # 1.1 Train on Real Data prediction_real = self.discriminator(real_data) real_score = torch.mean(prediction_real).item() # Calculate error and back propagate error_real = self.loss(prediction_real, self.__real_data_target(real_data.size(0))) error_real.backward() # 1.2 Train on Fake Data prediction_fake = self.discriminator(fake_data) fake_score = torch.mean(prediction_fake).item() # Calculate error and backpropagate error_fake = self.loss(prediction_fake, self.__fake_data_target(real_data.size(0))) error_fake.backward() # 1.3 Update weights with gradients optimizer.step() loss_D = error_real + error_fake # Return error return loss_D.item(), real_score, fake_score def __train_generator(self, optimizer, fake_data, BETA, ps_score_control, device): # 2. Train Generator # Reset gradients optimizer.zero_grad() # Sample noise and generate fake data predicted_D = self.discriminator(fake_data) # Calculate error and back propagate ps_score_control = ps_score_control.to(device) fake_data = fake_data.to(device) error_g = self.loss(predicted_D, self.__real_data_target(predicted_D.size(0))) prop_loss = self.__cal_propensity_loss(ps_score_control, fake_data, device) error = error_g + (BETA * prop_loss) error.backward() # Update weights with gradients optimizer.step() # Return error return error_g.item(), prop_loss.item()
ip = CosLinear(in_features=99, out_features=args.number_of_class) ############################################################################################# discriminator_activation_function = torch.relu d_hidden_size = 1024 d_output_size = 1 # d_learning_rate = 2e-4 sgd_momentum = 0.9 D = Discriminator( input_size=128, hidden_size=d_hidden_size, output_size=d_output_size, f=discriminator_activation_function, ).cuda() D = torch.nn.DataParallel(D, device_ids=gpu_ids) d_optimizer = torch.optim.SGD( D.parameters(), lr=config.d_learning_rate, momentum=sgd_momentum, weight_decay=5e-4, ) ############################################################################################## # ip = softmaxLinear (in_features = 512, out_features = args.number_of_class) # fr_loss_sup = RingLoss(loss_weight=0.01) fr_loss_sup = CenterLoss(num_classes=args.number_of_class, dim_hidden=99) criterion = mixed_loss_FR_batch( fr_ip=ip, fr_loss=CosLoss(num_cls=args.number_of_class, alpha=0.4), fr_loss_sup=fr_loss_sup,
def train(): input_channels = 3 lr = 0.01 momentum = 0.5 epochs = 200 lambda_pixel = 300 #gen = Gen(100) gen_model = Generator(input_channels, input_channels) disc_model = Discriminator(input_channels, 2) #optimizer_G = optim.Adam(gen_model .parameters(), lr=lr) #optimizer_D = optim.Adam(disc_model .parameters(), lr=lr) optimizer_G = optim.SGD(gen_model.parameters(), lr=lr, momentum=momentum) optimizer_D = optim.SGD(disc_model.parameters(), lr=lr, momentum=momentum) #piexl_loss = torch.nn.L1Loss() piexl_loss = nn.L1Loss() disc_loss = nn.CrossEntropyLoss() if use_cuda: gen_model = gen_model.cuda() disc_model = disc_model.cuda() piexl_loss = piexl_loss.cuda() disc_loss = disc_loss.cuda() # prepare fake_real label real_lines = open('real_face.txt', 'r').readlines()[:1000] cartoon_lines = open('cartoon_face.txt', 'r').readlines()[:1000] train_loader = GenertorData(real_lines, cartoon_lines, batch_size, input_size) epoch_g_loss = [] epoch_d_loss = [] fw_log = open('log.txt', 'w') for epoch in range(epochs): train_loss_G = 0 train_loss_D = 0 #for batch_idx, (data, target) in enumerate(train_loader): for batch_idx in range(len(train_loader)): data, target = train_loader[batch_idx] data, target = data.to(device), target.to(device) real_target, fake_target = generate_label(data.size(0)) # train generators optimizer_G.zero_grad() fake = gen_model(data) real_pred = disc_model(target) fake_pred = disc_model(fake) disc_loss_real = disc_loss(real_pred, real_target) disc_loss_fake = disc_loss(fake_pred, fake_target) loss_D = disc_loss_real + disc_loss_fake loss_G = piexl_loss(target, fake) loss_G = loss_D + lambda_pixel * loss_G loss_G.backward() optimizer_G.step() train_loss_G += loss_G.item() # train Discriminator if (batch_idx / 50) == epoch % (len(train_loader) / 50): # if loss_D > 0.05: optimizer_D.zero_grad() fake = gen_model(data) #print(fake.size()) real_pred = disc_model(target) fake_pred = disc_model(fake) disc_loss_real = disc_loss(real_pred, real_target) disc_loss_fake = disc_loss(fake_pred, fake_target) loss_D = disc_loss_real + disc_loss_fake loss_D.backward() optimizer_D.step() train_loss_D = loss_D.item() if batch_idx % 50 == 0: print("GAN train Epochs %d %d/%d G_loss %.6f D_loss %.6f" % (epoch, batch_idx, len(train_loader), loss_G.item(), train_loss_D)) epoch_g_loss.append(loss_G.item()) epoch_d_loss.append(train_loss_D) torch.save( gen_model.state_dict(), "model/gen_cartoon_model_epoch_" + str(epoch) + '_gloss' + str(loss_G.item())[:6] + '_d_loss' + str(train_loss_D)[:6] + ".pt") fw_log.write(str(epoch) + ' ' + str(epoch_g_loss) + '\n') fw_log.write(str(epoch) + ' ' + str(epoch_d_loss) + '\n') draw(epoch_g_loss, epoch_d_loss)
netG_B2A.apply(weights_init_normal) netD_A.apply(weights_init_normal) netD_B.apply(weights_init_normal) # Lossess criterion_GAN = torch.nn.MSELoss() criterion_cycle = torch.nn.L1Loss() criterion_identity = torch.nn.L1Loss() criterion_BCE = torch.nn.BCEWithLogitsLoss() # Optimizers & LR schedulers optimizer_G = torch.optim.Adam(itertools.chain(netG_A2B.parameters(), netG_B2A.parameters()), lr=opt.lr, betas=(0.5, 0.999)) optimizer_D_A = torch.optim.Adam(netD_A.parameters(), lr=opt.lr, betas=(0.5, 0.999)) optimizer_D_B = torch.optim.Adam(netD_B.parameters(), lr=opt.lr, betas=(0.5, 0.999)) lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR( optimizer_G, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_D_A = torch.optim.lr_scheduler.LambdaLR( optimizer_D_A, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_D_B = torch.optim.lr_scheduler.LambdaLR( optimizer_D_B, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step)
########################################################## GAN ################################################################ if config.re_type_gan: lr_p = pow(20, 1.0 / lr_change1) lr_d = lambda x_step: (lr_p**x_step) / (int( x_step > lr_change1) * 4 + 1) / (int(x_step > lr_change2) * 4 + 1) discriminator_activation_function = torch.relu d_hidden_size = 1024 d_output_size = 1 sgd_momentum = 0.9 D = Discriminator(input_size=99, hidden_size=d_hidden_size, output_size=d_output_size, f=discriminator_activation_function).cuda() D = torch.nn.DataParallel(D, device_ids=gpu_ids) d_optimizer = torch.optim.Adam(D.parameters(), lr=config.d_learning_rate, weight_decay=5e-4) #scheduler_d_optimizer = optim.lr_scheduler.MultiStepLR(d_optimizer, milestones=[lr_change1,lr_change2], gamma=0.2) scheduler_d_optimizer = optim.lr_scheduler.LambdaLR(d_optimizer, lr_lambda=lr_d) criterion_B = torch.nn.BCELoss().cuda() criterion_B = torch.nn.DataParallel(criterion_B, device_ids=gpu_ids) ############################################################################## log ######################################## iter_num = 0 train_loss = 0 correct = 0 total = 0 eval_loss = 0 eval_loss_v = 0
def train(dataset: Dataset): writer = SummaryWriter(log_dir="./log" + '/' + args.type + '_' + args.opt + '_lr' + str(args.lr)) train_set = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size) G = Generator(args.noise_size).to(device) D = Discriminator(args.type).to(device) # optimizer_G = torch.optim.Adam(G.parameters(), lr=args.lr) # optimizer_D = torch.optim.Adam(D.parameters(), lr=args.lr) if args.opt == 'rms': optimizer_G = torch.optim.RMSprop(G.parameters(), lr=args.lr) optimizer_D = torch.optim.RMSprop(D.parameters(), lr=args.lr) else: # sgd optimizer_G = torch.optim.SGD(G.parameters(), lr=args.lr) optimizer_D = torch.optim.SGD(D.parameters(), lr=args.lr) for epoch in range(args.epochs): G.train() D.train() loss_G_avg = 0.0 loss_D_avg = 0.0 for real_data in train_set: # 更新D real_data = real_data.to(device) # 真实的数据 noise = torch.randn(real_data.size(0), args.noise_size).to(device) # 随机噪声 fake_data = G(noise).to(device) # 生成的数据(假数据) # log(D(x)+log(1-D(G(z)))) 注意fake_data这里不参加backward故detach if args.type == 'wgan': loss_D = -(D(real_data) - D(fake_data.detach())).mean() else: loss_D = -(torch.log(D(real_data)) + torch.log( torch.ones(args.batch_size).to(device) - D(fake_data.detach()))).mean() optimizer_D.zero_grad() loss_D.backward() optimizer_D.step() loss_D_avg += loss_D.item() # wgan则需截断参数 if args.type == 'wgan': for p in D.parameters(): p.data.clamp_(-args.wgan_c, args.wgan_c) D.zero_grad() # 更新G noise = torch.randn(real_data.size(0), args.noise_size).to(device) # 随机噪声 fake_data = G(noise).to(device) # 生成的数据(假数据) if args.type == 'wgan': loss_G = -D(fake_data).mean() else: loss_G = (torch.log( torch.ones(args.batch_size).to(device) - D(fake_data))).mean() # log(1-D(G(z)))) optimizer_G.zero_grad() loss_G.backward() optimizer_G.step() loss_G_avg += loss_G.item() G.zero_grad() loss_G_avg /= len(train_set) loss_D_avg /= len(train_set) print('Epoch {} loss_G: {:.6f} loss_D: {:.6f}'.format( epoch + 1, loss_G_avg, loss_D_avg)) writer.add_scalar('train/G_loss', loss_G_avg, epoch + 1, walltime=epoch + 1) writer.add_scalar('train/D_loss', loss_D_avg, epoch + 1, walltime=epoch + 1) writer.flush() if (epoch + 1) % 10 == 0: visualize(G, D, dataset.get_numpy_data(), epoch + 1, args.type + '/' + args.opt + '_lr' + str(args.lr)) writer.close()
def train_GAN(self, data_loader_train, device): lr = 0.0002 netG = Generator().to(device) netD = Discriminator().to(device) # Initialize BCELoss function criterion = nn.BCELoss() # Create batch of latent vectors that we will use to visualize # the progression of the generator nz = 100 fixed_noise = torch.randn(64, nz, 1, 1, device=device) beta1 = 0.5 # Establish convention for real and fake labels during training real_label = 1. fake_label = 0. # Setup Adam optimizers for both G and D optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) # Training Loop # Lists to keep track of progress img_list = [] G_losses = [] D_losses = [] iters = 0 num_epochs = 150 print("Starting Training Loop...") # For each epoch for epoch in range(num_epochs): # For each batch in the dataloader with tqdm(total=len(train_data_loader)) as t: for i, data in enumerate(data_loader_train, 0): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### ## Train with all-real batch netD.zero_grad() # Format batch real_cpu = data[0].to(device) b_size = real_cpu.size(0) label = torch.full((b_size, ), real_label, dtype=torch.float, device=device) # Forward pass real batch through D output = netD(real_cpu).view(-1) # Calculate loss on all-real batch errD_real = criterion(output, label) # Calculate gradients for D in backward pass errD_real.backward() D_x = output.mean().item() ## Train with all-fake batch # Generate batch of latent vectors noise = torch.randn(b_size, nz, 1, 1, device=device) # Generate fake image batch with G fake = netG(noise) label.fill_(fake_label) # Classify all fake batch with D output = netD(fake.detach()).view(-1) # Calculate D's loss on the all-fake batch errD_fake = criterion(output, label) # Calculate the gradients for this batch errD_fake.backward() D_G_z1 = output.mean().item() # Add the gradients from the all-real and all-fake batches errD = errD_real + errD_fake # Update D optimizerD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ########################### netG.zero_grad() label.fill_( real_label) # fake labels are real for generator cost # Since we just updated D, perform another forward pass of all-fake batch through D output = netD(fake).view(-1) # Calculate G's loss based on this output errG = criterion(output, label) # Calculate gradients for G errG.backward() D_G_z2 = output.mean().item() # Update G optimizerG.step() # Output training stats t.set_postfix(epoch='{0}'.format(epoch), loss_g='{:05.3f}'.format(errG.item()), loss_d='{:05.3f}'.format(errD.item())) t.update() # if i % 50 == 0: # print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' # % (epoch, num_epochs, i, len(data_loader_train), # errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) # Save Losses for plotting later G_losses.append(errG.item()) D_losses.append(errD.item()) # Check how the generator is doing by saving G's output on fixed_noise if (iters % 10 == 0) or ((epoch == num_epochs - 1) and (i == len(data_loader_train) - 1)): with torch.no_grad(): fake = netG(fixed_noise).detach().cpu() img_list.append( vutils.make_grid(fake, padding=2, normalize=True)) iters += 1 return G_losses, D_losses, img_list