data_loader.set_data_list(data_list[stage]) data_gen[stage] = DataGenerator(data_loader, generator_config[stage]) # - GPUs os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus']) torch.backends.cudnn.enabled = True # - model model = Classifier(out_channels=2) if args.checkpoint is not None: model.load_state_dict(torch.load(args.checkpoint)) print('Load checkpoint:', args.checkpoint) if torch.cuda.device_count() > 0: model = model.cuda() model.zero_grad() # - optimizer optim = Optimizer(config['optimizer'])(model) optim.zero_grad() weight = torch.tensor([0.1, 0.99]) if torch.cuda.device_count() > 0: weight = weight.cuda() criterion = torch.nn.CrossEntropyLoss(weight) def F1_score(predis, labels): return 2 * torch.sum(predis * labels) / torch.sum(predis + labels)
def train(seed=0, dataset='grid', samplers=(UniformDatasetSampler, UniformLatentSampler), latent_dim=2, model_dim=256, device='cuda', conditional=False, learning_rate=2e-4, betas=(0.5, 0.9), batch_size=256, iterations=400, n_critic=5, objective='gan', gp_lambda=10, output_dir='results', plot=False, spec_norm=True): experiment_name = [ seed, dataset, samplers[0].__name__, samplers[1].__name__, latent_dim, model_dim, device, conditional, learning_rate, betas[0], betas[1], batch_size, iterations, n_critic, objective, gp_lambda, plot, spec_norm ] experiment_name = '_'.join([str(p) for p in experiment_name]) results_dir = os.path.join(output_dir, experiment_name) network_dir = os.path.join(results_dir, 'networks') eval_log = os.path.join(results_dir, 'eval.log') os.makedirs(results_dir, exist_ok=True) os.makedirs(network_dir, exist_ok=True) eval_file = open(eval_log, 'w') if plot: samples_dir = os.path.join(results_dir, 'samples') os.makedirs(samples_dir, exist_ok=True) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) data, labels = load_data(dataset) data_dim, num_classes = data.shape[1], len(set(labels)) data_sampler = samplers[0]( torch.tensor(data).float(), torch.tensor(labels).long()) if conditional else samplers[0]( torch.tensor(data).float()) noise_sampler = samplers[1]( latent_dim, labels) if conditional else samplers[1](latent_dim) if conditional: test_data, test_labels = load_data(dataset, split='test') test_dataset = TensorDataset( torch.tensor(test_data).to(device).float(), torch.tensor(test_labels).to(device).long()) test_dataloader = DataLoader(test_dataset, batch_size=4096) G = Generator(latent_dim + num_classes, model_dim, data_dim).to(device).train().train() D = Discriminator(model_dim, data_dim + num_classes, spec_norm=spec_norm).to(device).train() C_real = Classifier(model_dim, data_dim, num_classes).to(device).train() C_fake = Classifier(model_dim, data_dim, num_classes).to(device).train() C_fake.load_state_dict(deepcopy(C_real.state_dict())) C_real_optimizer = optim.Adam(C_real.parameters(), lr=2 * learning_rate) C_fake_optimizer = optim.Adam(C_fake.parameters(), lr=2 * learning_rate) C_crit = nn.CrossEntropyLoss() else: G = Generator(latent_dim, model_dim, data_dim).to(device).train() D = Discriminator(model_dim, data_dim, spec_norm=spec_norm).to(device).train() D_optimizer = optim.Adam(D.parameters(), lr=learning_rate, betas=betas) G_optimizer = optim.Adam(G.parameters(), lr=learning_rate, betas=betas) if objective == 'gan': fake_target = torch.zeros(batch_size, 1).to(device) real_target = torch.ones(batch_size, 1).to(device) elif objective == 'wgan': grad_target = torch.ones(batch_size, 1).to(device) elif objective == 'hinge': bound = torch.zeros(batch_size, 1).to(device) sub = torch.ones(batch_size, 1).to(device) stats = {'D': [], 'G': [], 'C_it': [], 'C_real': [], 'C_fake': []} if plot: fixed_latent_batch = noise_sampler.get_batch(20000) sample_figure = plt.figure(num=0, figsize=(5, 5)) loss_figure = plt.figure(num=1, figsize=(10, 5)) if conditional: accuracy_figure = plt.figure(num=2, figsize=(10, 5)) for it in range(iterations + 1): # Train Discriminator data_batch = data_sampler.get_batch(batch_size) latent_batch = noise_sampler.get_batch(batch_size) if conditional: x_real, y_real = data_batch[0].to(device), data_batch[1].to(device) real_sample = torch.cat([x_real, y_real], dim=1) z_fake, y_fake = latent_batch[0].to(device), latent_batch[1].to( device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)).detach() fake_sample = torch.cat([x_fake, y_fake], dim=1) else: x_real = data_batch.to(device) real_sample = x_real z_fake = latent_batch.to(device) x_fake = G(z_fake).detach() fake_sample = x_fake D.zero_grad() real_pred = D(real_sample) fake_pred = D(fake_sample) if is_recorded(data_sampler): data_sampler.record(real_pred.detach().cpu().numpy()) if is_weighted(data_sampler): weights = torch.tensor( data_sampler.get_weights()).to(device).float().view( real_pred.shape) else: weights = torch.ones_like(real_pred).to(device) if objective == 'gan': D_loss = F.binary_cross_entropy(fake_pred, fake_target).mean() + ( weights * F.binary_cross_entropy(real_pred, real_target)).mean() stats['D'].append(D_loss.item()) elif objective == 'wgan': alpha = torch.rand(batch_size, 1).expand(real_sample.size()).to(device) interpolate = (alpha * real_sample + (1 - alpha) * fake_sample).requires_grad_(True) gradients = torch.autograd.grad(outputs=D(interpolate), inputs=interpolate, grad_outputs=grad_target, create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = (gradients.norm(2, dim=1) - 1).pow(2).mean() * gp_lambda D_loss = fake_pred.mean() - (real_pred * weights).mean() stats['D'].append(-D_loss.item()) D_loss += gradient_penalty elif objective == 'hinge': D_loss = -(torch.min(real_pred - sub, bound) * weights).mean() - torch.min(-fake_pred - sub, bound).mean() stats['D'].append(D_loss.item()) D_loss.backward() D_optimizer.step() # Train Generator if it % n_critic == 0: G.zero_grad() latent_batch = noise_sampler.get_batch(batch_size) if conditional: z_fake, y_fake = latent_batch[0].to( device), latent_batch[1].to(device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)) fake_pred = D(torch.cat([x_fake, y_fake], dim=1)) else: z_fake = latent_batch.to(device) x_fake = G(z_fake) fake_pred = D(x_fake) if objective == 'gan': G_loss = F.binary_cross_entropy(fake_pred, real_target).mean() stats['G'].extend([G_loss.item()] * n_critic) elif objective == 'wgan': G_loss = -fake_pred.mean() stats['G'].extend([-G_loss.item()] * n_critic) elif objective == 'hinge': G_loss = -fake_pred.mean() stats['G'].extend([-G_loss.item()] * n_critic) G_loss.backward() G_optimizer.step() if conditional: # Train fake classifier C_fake.train() C_fake.zero_grad() C_fake_loss = C_crit(C_fake(x_fake.detach()), y_fake.argmax(1)) C_fake_loss.backward() C_fake_optimizer.step() # Train real classifier C_real.train() C_real.zero_grad() C_real_loss = C_crit(C_real(x_real), y_real.argmax(1)) C_real_loss.backward() C_real_optimizer.step() if it % 5 == 0: C_real.eval() C_fake.eval() real_correct, fake_correct, total = 0.0, 0.0, 0.0 for idx, (sample, label) in enumerate(test_dataloader): real_correct += ( C_real(sample).argmax(1).view(-1) == label).sum() fake_correct += ( C_fake(sample).argmax(1).view(-1) == label).sum() total += sample.shape[0] stats['C_it'].append(it) stats['C_real'].append(real_correct.item() / total) stats['C_fake'].append(fake_correct.item() / total) line = f"{it}\t{stats['D'][-1]:.3f}\t{stats['G'][-1]:.3f}" if conditional: line += f"\t{stats['C_real'][-1]*100:.3f}\t{stats['C_fake'][-1]*100:.3f}" print(line, eval_file) if plot: if conditional: z_fake, y_fake = fixed_latent_batch[0].to( device), fixed_latent_batch[1].to(device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)) else: z_fake = fixed_latent_batch.to(device) x_fake = G(z_fake) generated = x_fake.detach().cpu().numpy() plt.figure(0) plt.clf() plt.scatter(generated[:, 0], generated[:, 1], marker='.', color=(0, 1, 0, 0.01)) plt.axis('equal') plt.xlim(-1, 1) plt.ylim(-1, 1) plt.savefig(os.path.join(samples_dir, f'{it}.png')) plt.figure(1) plt.clf() plt.plot(stats['G'], label='Generator') plt.plot(stats['D'], label='Discriminator') plt.legend() plt.savefig(os.path.join(results_dir, 'loss.png')) if conditional: plt.figure(2) plt.clf() plt.plot(stats['C_it'], stats['C_real'], label='Real') plt.plot(stats['C_it'], stats['C_fake'], label='Fake') plt.legend() plt.savefig(os.path.join(results_dir, 'accuracy.png')) save_model(G, os.path.join(network_dir, 'G_trained.pth')) save_model(D, os.path.join(network_dir, 'D_trained.pth')) save_stats(stats, os.path.join(results_dir, 'stats.pth')) if conditional: save_model(C_real, os.path.join(network_dir, 'C_real_trained.pth')) save_model(C_fake, os.path.join(network_dir, 'C_fake_trained.pth')) eval_file.close()