def evaluate_validation(self, G, D, eval_generator=True, eval_discriminator=True, norm_g=1, norm_d=1): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return if eval_discriminator: D.error = 0 if eval_generator: G.error = 0 n = 0 G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) for images, _ in self.validation_loader: images = tools.cuda(Variable(images)) n += 1 if eval_discriminator: D.do_eval(G, images) if eval_generator: G.do_eval(D, images) if eval_discriminator: D.error /= n * norm_d if eval_generator: G.error /= n * norm_g G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu
def calc_global_metrics(self, best_discriminators, images): if Generator.noise_images is None: Generator.noise_images = tools.cuda( torch.FloatTensor(len(images), *images[0].shape).uniform_(-1, 1)) D = tools.cuda(best_discriminators[0]) labels = tools.cuda(Tensor(torch.ones(images.size(0)))) fake_labels = tools.cuda(Tensor(torch.zeros(images.size(0)))) if config.evolution.fitness.generator.startswith("validation_loss_"): loss_function = getattr(self, config.evolution.fitness.generator[11:]) fake_data = self(self.generate_noise(images.size(0))) fake_decision = D(fake_data) error = loss_function(D, fake_decision, images).item() self.fitness_values = [error] elif config.evolution.fitness.generator == "rel_avg": with torch.no_grad(): real_decision = D(images) fake_data = self(self.generate_noise(images.size(0))) fake_decision = D(fake_data) train_score = torch.mean(torch.sigmoid(real_decision)) gen_score = torch.mean(torch.sigmoid(fake_decision)) noise_score = torch.mean( torch.sigmoid(D(Generator.noise_images))) d_conf = (1 + train_score - noise_score) / 2 value = -d_conf * gen_score self.fitness_values = [value.item()] elif config.evolution.fitness.generator == "rel_avg2": with torch.no_grad(): real_decision = D(images) fake_data = self(self.generate_noise(images.size(0))) fake_decision = D(fake_data) noise_decision = D(Generator.noise_images) error = (binary_cross_entropy_with_logits( real_decision.view(-1) - torch.mean(fake_decision.view(-1)), fake_labels) + binary_cross_entropy_with_logits( fake_decision.view(-1) - torch.mean(real_decision.view(-1)), labels) + binary_cross_entropy_with_logits( noise_decision.view(-1) - torch.mean(real_decision.view(-1)), labels)) / 3 self.fitness_values = [error.item()] elif config.evolution.fitness.generator == "rel_avg3": with torch.no_grad(): real_decision = D(images) fake_data = self(self.generate_noise(images.size(0))) fake_decision = D(fake_data) noise_decision = D(Generator.noise_images) mean_noise = torch.mean(noise_decision) error = (binary_cross_entropy_with_logits( real_decision.view(-1) - (torch.mean(fake_decision.view(-1)) + mean_noise) / 2, fake_labels) + binary_cross_entropy_with_logits( fake_decision.view(-1) - (torch.mean(real_decision.view(-1)) + mean_noise) / 2, labels)) / 2 self.fitness_values = [error.item()] D.cpu()
def evaluate_validation(self, G, D, eval_generator=True, eval_discriminator=True): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() G, D = tools.cuda(G), tools.cuda(D) G.eval(), D.eval() G.win_rate, D.win_rate = 0, 0 n = 0 while n < config.evolution.fitness.evaluation_batches: image_loader = self.eval_batches if config.evolution.evaluation.same_batches and self.eval_batches else self.validation_loader for images, _ in image_loader: if config.evolution.evaluation.same_batches and image_loader != self.eval_batches: self.eval_batches.append((images, _)) n += 1 images = tools.cuda(images) if eval_discriminator: D.do_eval(G, images) # FIXME always eval D when skill rating is enabled if eval_generator: G.do_eval(D, images) G.win_rate = 1 - D.win_rate if n >= config.evolution.fitness.evaluation_batches: break D.win_rate /= n G.win_rate = 1 - D.win_rate if eval_discriminator: D.calc_skill_rating(G) if eval_generator: G.calc_skill_rating(D) logger.debug(f"eval GLICKO G: {G.skill_rating} {G.win_rate}, D: {D.skill_rating} {D.win_rate}") G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache()
def evaluate_validation(self, G, D, eval_generator=True, eval_discriminator=True): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() G, D = tools.cuda(G), tools.cuda(D) G.eval(), D.eval() G.win_rate, D.win_rate = 0, 0 n = 0 for images, _ in self.validation_loader: if n + 1 > config.evolution.fitness.evaluation_batches: break n += 1 images = tools.cuda(images) G.win_rate, D.win_rate = 0, 0 D.do_eval(G, images) # D.win_rate /= n if eval_discriminator: D.calc_skill_rating(G) if eval_generator: G.win_rate = 1 - D.win_rate G.calc_skill_rating(D) print("eval GLICKO G:", G.skill_rating, G.win_rate, ", D:", D.skill_rating, D.win_rate) G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache()
def train_step(self, G, images): """Train the discriminator on real+fake""" self.zero_grad() if self.labels is None: self.labels = tools.cuda(Tensor(torch.ones(images.size(0)))) self.labels = self.labels * 0.9 if config.gan.label_smoothing else self.labels if self.fake_labels is None: self.fake_labels = tools.cuda(Tensor(torch.zeros(images.size(0)))) self.fake_labels = self.fake_labels + 0.1 if config.gan.label_smoothing else self.fake_labels error, real_decision, fake_decision, fake_data = self.loss(G, images) if self.use_gradient_penalty(): gradient_penalty = self.gradient_penalty(images.data, fake_data.data) error += gradient_penalty error.backward() self.optimizer.step() # clip weights for WGAN if self.genome.gan_type == "wgan" and not self.use_gradient_penalty(): clip_value = 0.01 for p in self.parameters(): p.data.clamp_(-clip_value, clip_value) self.calc_metrics(G, error.item(), real_decision, fake_decision) return error.item()
def train_evaluate(self, G, D, train_generator=True, train_discriminator=True, norm_g=1, norm_d=1): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() n, ng = 0, 0 G.error = G.error or 0 D.error = D.error or 0 g_error = G.error d_error = D.error d_fitness_value, g_fitness_value = D.fitness_value, G.fitness_value G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) G.train() D.train() while n < config.gan.batches_limit: for images, _ in self.train_loader: # if n==0: print(images[0].mean()) n += 1 if n > config.gan.batches_limit: break images = tools.cuda(Variable(images)) if train_discriminator: D.do_train(G, images) if train_generator and n % config.gan.critic_iterations == 0: ng += 1 G.do_train(D, images) if train_discriminator: D.error = d_error + (D.error - d_error)/(n*norm_d) D.fitness_value = d_fitness_value + (D.fitness_value - d_fitness_value) / (n * norm_d) G.fitness_value = g_fitness_value + (G.fitness_value - g_fitness_value) / (n * norm_g) if train_generator: G.error = g_error + (G.error - g_error)/(ng*norm_g) G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache()
def train_step(self, G, images): """Train the discriminator on real+fake""" self.zero_grad() if self.labels is None: self.labels = tools.cuda(Tensor(torch.ones(images.size(0)))) self.labels = self.labels * 0.9 if config.gan.label_smoothing else self.labels if self.fake_labels is None: self.fake_labels = tools.cuda(Tensor(torch.zeros(images.size(0)))) self.fake_labels = self.fake_labels + 0.1 if config.gan.label_smoothing else self.fake_labels # 1A: Train D on real real_error, real_decision = self.step_real(images) if config.gan.type == "wgan": real_error.backward(tensor_constants.ONE) # compute/store gradients, but don't change params elif config.gan.type not in ["rsgan", "rasgan"]: real_error.backward() # compute/store gradients, but don't change params # 1B: Train D on fake fake_error, fake_data, fake_decision = self.step_fake(G, batch_size=images.size()[0]) if config.gan.type == "wgan": fake_error.backward(tensor_constants.MONE) elif config.gan.type not in ["rsgan", "rasgan"]: fake_error.backward() if config.gan.type == "rsgan": real_error = self.criterion(real_decision.view(-1) - fake_decision.view(-1), self.labels) real_error.backward() fake_error = tools.cuda(torch.FloatTensor([0])) elif config.gan.type == "rasgan": real_error = (self.criterion(real_decision.view(-1) - torch.mean(fake_decision.view(-1)), self.labels) + self.criterion(torch.mean(fake_decision.view(-1)) - real_decision.view(-1), self.fake_labels))/2 real_error.backward() fake_error = tools.cuda(torch.FloatTensor([0])) if config.evolution.fitness.discriminator == "AUC": # full_decision = np.concatenate((real_decision.cpu().data.numpy().flatten(), fake_decision.cpu().data.numpy().flatten())) # full_labels = np.concatenate((np.ones(real_decision.size()[0]), np.zeros(fake_decision.size()[0]))) # self.fitness_value -= roc_auc_score(full_labels, full_decision) # self.fitness_value -= average_precision_score(full_labels, full_decision) # self.fitness_value += 1 - accuracy_score(full_labels, full_decision>0.5) # self.fitness_value += np.random.rand() self.fitness_value += abs(accuracy_score(np.zeros(fake_decision.size()[0]), fake_decision.cpu().data.numpy().flatten()>0.5) - accuracy_score(np.ones(real_decision.size()[0]), real_decision.cpu().data.numpy().flatten()>0.5)) if config.gan.discriminator.use_gradient_penalty: gradient_penalty = self.gradient_penalty(images.data, fake_data.data) gradient_penalty.backward() self.optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward() self.calc_win_rate(real_decision, fake_decision, G) # Wasserstein distance if config.gan.type == "wgan": return (real_error - fake_error).item() return (real_error + fake_error).item()
def inception_score(imgs, batch_size=32, resize=False, splits=1): """Computes the inception score of the generated images imgs imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1] cuda -- whether or not to run on GPU batch_size -- batch size for feeding into Inception v3 splits -- number of splits """ N = len(imgs) assert batch_size > 0 assert N > batch_size # Set up dataloader dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size) # Load inception model inception_model = tools.cuda(inception_v3(pretrained=True, transform_input=False)) inception_model.eval() up = tools.cuda(nn.Upsample(size=(299, 299), mode='bilinear', align_corners=True)) def get_pred(x): if x.size()[1] == 1: x = torch.cat((x, x, x), 1) if resize: x = up(x) x = inception_model(x) return F.softmax(x, dim=1).data.cpu().numpy() # Get predictions preds = np.zeros((N, 1000)) for i, batch in enumerate(dataloader, 0): batch = tools.cuda(batch) batchv = Variable(batch) batch_size_i = batch.size()[0] preds[i*batch_size:i*batch_size + batch_size_i] = get_pred(batchv) # Now compute the mean kl-div split_scores = [] for k in range(splits): part = preds[k * (N // splits): (k+1) * (N // splits), :] py = np.mean(part, axis=0) scores = [] for i in range(part.shape[0]): pyx = part[i, :] scores.append(entropy(pyx, py)) split_scores.append(np.exp(np.mean(scores))) return np.mean(split_scores), np.std(split_scores)
def gradient_penalty(self, real_data, fake_data): batch_size = real_data.size()[0] alpha = torch.rand(batch_size, 1, 1, 1) alpha = tools.cuda(alpha.expand_as(real_data)) interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.clone().detach().requires_grad_(True) disc_interpolates = tools.cuda(self(interpolates)) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=tools.cuda(torch.ones(disc_interpolates.size())), create_graph=True, retain_graph=True, only_inputs=True)[0] return ((gradients.norm(2, dim=1) - 1) ** 2).mean() * config.gan.discriminator.gradient_penalty_lambda
def getFIDScore(self): self.Gen_network = self.Gen_network.to(self.device) self.Gen_network.eval() noise = torch.randn(1000, 100, 1, 1, device=self.device) generated_images = self.Gen_network(noise).detach() self.Gen_network.zero_grad() self.Gen_network.cpu() torch.cuda.empty_cache() # Get FID score for the model: global base_fid_statistics, inception_model if (base_fid_statistics is None and inception_model is None): base_fid_statistics, inception_model = generative_score.initialize_fid( self.dataloader, sample_size=1000) inception_model = tools.cuda(inception_model) m1, s1 = fid_score.calculate_activation_statistics( generated_images.data.cpu().numpy(), inception_model, cuda=tools.is_cuda_available(), dims=2048) inception_model.cpu() m2, s2 = base_fid_statistics ret = fid_score.calculate_frechet_distance(m1, s1, m2, s2) torch.cuda.empty_cache() return ret
def forward(self, x): self.scale = tools.cuda(self.scale) if x.is_cuda else self.scale.cpu() x = x.mul(self.scale) if self.bias is not None: dims = [1, 1] if len(x.size()) == 4 else [] x += self.bias.view(1, -1, *dims).expand_as(x) return x
def initialize_fid(train_loader, sample_size=1000): global base_fid_statistics, inception_model if inception_model is None: inception_model = InceptionV3([ InceptionV3.BLOCK_INDEX_BY_DIM[ config.evolution.fitness.fid_dimension] ]) inception_model = tools.cuda(inception_model) if base_fid_statistics is None: print("calculate base fid statistics") # TODO see a better way to load images from the train dataset train_images = [] for images, _ in train_loader: train_images += list(images.numpy()) if len(train_images) > sample_size: train_images = train_images[:sample_size] break train_images = np.array(train_images) base_fid_statistics = fid_score.calculate_activation_statistics( train_images, inception_model, cuda=tools.is_cuda_available(), dims=config.evolution.fitness.fid_dimension) inception_model.cpu() print("completed..") return base_fid_statistics, inception_model
def generate_noise(self, batch_size, volatile=False, cuda=True): with torch.set_grad_enabled(not volatile): gen_input = tools.cuda(torch.randn([batch_size] + list(self.input_shape[1:]), requires_grad=True), condition=cuda) return gen_input
def train_evaluate(self, G, D, norm_g=1, norm_d=1): if config.evolution.evaluation.reset_optimizer: D.reset_optimizer_state() G.reset_optimizer_state() if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() n, ng = 0, 0 G.error = G.error or 0 D.error = D.error or 0 g_error = G.error d_error = D.error d_fitness_value, g_fitness_value = D.fitness_value, G.fitness_value G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) G.train() D.train() G.win_rate, D.win_rate = 0, 0 while n < config.gan.batches_limit: for images, _ in self.train_loader_iter: if n + 1 > config.gan.batches_limit: break n += 1 images = tools.cuda(images) D.do_train(G, images) if n % config.gan.critic_iterations == 0: ng += 1 G.do_train(D, images) else: self.train_loader_iter = iter(self.train_loader) D.error = d_error + (D.error - d_error) / (n * norm_d) new_d_fitness = (D.fitness_value - d_fitness_value) / n new_g_fitness = (G.fitness_value - g_fitness_value) / ng D.fitness_value = d_fitness_value + new_d_fitness / norm_d G.fitness_value = g_fitness_value + new_g_fitness / norm_g G.error = g_error + (G.error - g_error) / (ng * norm_g) D.win_rate /= n G.win_rate = 1 - D.win_rate D.calc_skill_rating(G) G.calc_skill_rating(D) # print("train GLICKO G:", G.skill_rating, G.win_rate, ", D:", D.skill_rating, D.win_rate) G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache()
def calc_activations(df, image_shape, batch_size): inception_model = tools.cuda(InceptionV3([InceptionV3.BLOCK_INDEX_BY_DIM[2048]])) images = get_image_data(df, image_shape).reshape((-1, *image_shape)) print(images.shape) act = get_activations(images, inception_model, batch_size=batch_size, dims=2048, cuda=tools.is_cuda_available(), verbose=True) del inception_model torch.cuda.empty_cache() print(act.shape) return act, images
def train_step(self, D, images): self.inception_score_mean = 0 batch_size = images.size(0) # 2. Train G on D's response (but DO NOT train D on these labels) self.zero_grad() if self.real_labels is None: self.real_labels = tools.cuda(Tensor(torch.ones(batch_size))) self.real_labels = self.real_labels * 0.9 if config.gan.label_smoothing else self.real_labels if self.fake_labels is None: self.fake_labels = tools.cuda(Tensor(torch.zeros(images.size(0)))) self.fake_labels = self.fake_labels + 0.1 if config.gan.label_smoothing else self.fake_labels error, decision = self.loss(D, images) error.backward() self.optimizer.step() # Only optimizes G's parameters self.calc_metrics(D, error.item(), decision, images) return error.item()
def step_real(self, images): real_decision = self(images) if config.gan.type in ["wgan", "rsgan", "rasgan"]: return real_decision.mean(), real_decision elif config.gan.type == "lsgan": return 0.5 * torch.mean((real_decision - 1)**2), real_decision labels = tools.cuda(Variable(torch.ones(images.size(0)))) labels = labels * 0.9 if config.gan.label_smoothing else labels return self.criterion(real_decision.view(-1), labels), real_decision
def fid_images(generated_images): global base_fid_statistics, inception_model inception_model = tools.cuda(inception_model, use_cuda) start_time = time.time() m1, s1 = fid_score.calculate_activation_statistics( generated_images.numpy(), inception_model, cuda=tools.is_cuda_available(use_cuda), dims=config.evolution.fitness.fid_dimension, batch_size=config.evolution.fitness.fid_batch_size) print("FID: calc activation --- %s seconds ---" % (time.time() - start_time)) inception_model.cpu() m2, s2 = base_fid_statistics ret = fid_score.calculate_frechet_distance(m1, s1, m2, s2) return ret
def train_step(self, D, images): self.inception_score_mean = 0 batch_size = images.size(0) # 2. Train G on D's response (but DO NOT train D on these labels) self.zero_grad() if self.real_labels is None: self.real_labels = tools.cuda(Tensor(torch.ones(batch_size))) self.real_labels = self.real_labels * 0.9 if config.gan.label_smoothing else self.real_labels error, decision = self.step(D, batch_size) if config.gan.type == "wgan": error.backward(tensor_constants.ONE) elif config.gan.type == "rsgan": real_decision = D(images) error = self.criterion( decision.view(-1) - real_decision.view(-1), self.real_labels) error.backward() elif config.gan.type == "rasgan": real_decision = D(images) labels_zeros = tools.cuda(Tensor(torch.zeros(images.size(0)))) error = (self.criterion( real_decision.view(-1) - torch.mean(decision.view(-1)), labels_zeros) + self.criterion( torch.mean(decision.view(-1)) - real_decision.view(-1), self.real_labels)) / 2 error.backward() else: error.backward() if config.evolution.fitness.generator == "AUC": labels = np.ones(images.size(0)) self.fitness_value += 1 - accuracy_score(labels, decision.cpu() > 0.5) self.optimizer.step() # Only optimizes G's parameters if config.gan.type == "wgan": return error.item() return error.item()
def fid_images(generated_images): global base_fid_statistics, inception_model inception_model = tools.cuda(inception_model) m1, s1 = fid_score.calculate_activation_statistics( generated_images.data.cpu().numpy(), inception_model, cuda=tools.is_cuda_available(), dims=config.evolution.fitness.fid_dimension) inception_model.cpu() m2, s2 = base_fid_statistics ret = fid_score.calculate_frechet_distance(m1, s1, m2, s2) return ret
def train_evaluate(self, G, D, batches_limit): logger.debug(f"train: G({G.genome.gan_type}) x D({D.genome.gan_type}), batches: {batches_limit}") if config.evolution.evaluation.reset_optimizer: D.reset_optimizer_state() G.reset_optimizer_state() if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() n = 0 G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) G.train() D.train() G.win_rate, D.win_rate = 0, 0 while n < batches_limit: image_loader = self.batches if config.evolution.evaluation.same_batches and self.batches else self.train_loader for images, _ in image_loader: if config.evolution.evaluation.same_batches and image_loader != self.batches: self.batches.append((images, _)) n += 1 images = tools.cuda(images) if n % config.gan.generator_iterations == 0: D.do_train(G, images) if n % config.gan.critic_iterations == 0: G.do_train(D, images) if n >= config.gan.batches_limit: break D.win_rate /= n G.win_rate = 1 - D.win_rate D.calc_skill_rating(G) G.calc_skill_rating(D) # print("train GLICKO G:", G.skill_rating, G.win_rate, ", D:", D.skill_rating, D.win_rate) G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache()
def step(self, D, batch_size, gen_input=None): if gen_input is None: gen_input = self.generate_noise(batch_size) real_labels = tools.cuda(Variable(torch.ones(batch_size))) fake_data = self(gen_input) fake_decision = D(fake_data) if config.gan.type in ["wgan", "rsgan", "rasgan"]: return fake_decision.mean(), fake_decision elif config.gan.type == "lsgan": return 0.5 * torch.mean((fake_decision - 1)**2), fake_decision real_labels = real_labels * 0.9 if config.gan.label_smoothing else real_labels return self.criterion(fake_decision.view(-1), real_labels), fake_decision
def initialize_fid(train_loader, size=1000): global base_fid_statistics, inception_model if inception_model is None: inception_model = build_inception_model() inception_model = tools.cuda(inception_model, use_cuda) if base_fid_statistics is None: logger.info("calculate base fid statistics: %d", size) base_fid_statistics = fid_score.calculate_activation_statistics( train_loader.dataset, inception_model, dims=config.evolution.fitness.fid_dimension, size=size, batch_size=config.evolution.fitness.fid_batch_size) inception_model.cpu()
def load_data(batch_size, dataset_name, images_per_model, run_dirs): df = pd.DataFrame() image_shape = None noise_data = None for run_dir, generation in run_dirs: target_size = len(df) + images_per_model if generation is None: config.gan.dataset = dataset_name # config.gan.dataset_resize = [64, 64] dataset = GanTrain.create_dataset() train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=True) # load images from dataset for images, labels in train_loader: image_shape = images.shape[1:] df_new = pd.DataFrame(images.numpy().reshape((-1, np.prod(image_shape)))) df_new["model"] = dataset_name df_new["run_dir"] = run_dir df_new["generation"] = None df_new["y"] = np.zeros(len(images)) if len(labels.shape) > 1 else labels.numpy() df = df.append(df_new) if len(df) >= target_size: break else: if noise_data is None: noise_data = Generator().generate_noise(images_per_model, volatile=True) print("noise data created", noise_data.shape) last_model = sorted(glob.glob(os.path.join(run_dir, "generations", f"{generation:03}", "generator.pkl")))[ -1] best_generator = tools.cuda(Generator.load(last_model)) n = 0 while len(df) < target_size: noise = noise_data[n:min(n+batch_size, len(noise_data))] n += batch_size images = best_generator(noise).detach().cpu().numpy() image_shape = images.shape[1:] df_new = pd.DataFrame(images.reshape((-1, np.prod(image_shape)))) df_new["model"] = f"{run_dir}|{generation}" df_new["run_dir"] = run_dir df_new["generation"] = generation df_new["y"] = np.zeros(len(images)) df = df.append(df_new) del noise if len(df) >= target_size: break best_generator = best_generator.cpu() torch.cuda.empty_cache() print(df.describe()) return df, image_shape
def step_fake(self, G, batch_size): gen_input = G.generate_noise(batch_size) fake_data = G( gen_input).detach() # detach to avoid training G on these labels fake_decision = self(fake_data) if config.gan.type in ["wgan", "rsgan", "rasgan"]: return fake_decision.mean(), fake_data, fake_decision elif config.gan.type == "lsgan": return 0.5 * torch.mean( (fake_decision)**2), fake_data, fake_decision fake_labels = tools.cuda(Variable(torch.zeros(batch_size))) fake_labels = fake_labels + 0.1 if config.gan.label_smoothing else fake_labels return self.criterion(fake_decision.view(-1), fake_labels), fake_data, fake_decision
def fid_images(dataloader, size=1000): global base_fid_statistics, inception_model inception_model = tools.cuda(inception_model, use_cuda) start_time = time.time() m1, s1 = fid_score.calculate_activation_statistics( dataloader, inception_model, dims=config.evolution.fitness.fid_dimension, size=size, batch_size=config.evolution.fitness.fid_batch_size) print("FID: calc activation --- %s seconds ---" % (time.time() - start_time)) inception_model.cpu() m2, s2 = base_fid_statistics ret = fid_score.calculate_frechet_distance(m1, s1, m2, s2) return ret
def test_serialization(self): images = tools.cuda(Variable(torch.randn(5, 100)).view(5, 1, 10, 10)) input_shape = images[0].size() discriminator = Discriminator(output_size=1, input_shape=[1] + list(input_shape)) discriminator.setup() generator = Generator(output_size=input_shape) generator.setup() generator = tools.cuda(generator) discriminator = tools.cuda(discriminator) discriminator.do_train(generator, images) generator.do_train(discriminator, images) # save and load the discriminator discriminator_path = f"{self.test_path}/discriminator.pkl" discriminator.save(discriminator_path) loaded_discriminator = Discriminator.load(discriminator_path) self.assert_state_dict_equal(discriminator.state_dict(), loaded_discriminator.state_dict()) # save and load the generator generator_path = f"{self.test_path}/generator.pkl" generator.save(generator_path) loaded_generator = Generator.load(generator_path) loaded_generator = tools.cuda(loaded_generator) generator = tools.cuda(generator) self.assert_state_dict_equal(generator.state_dict(), loaded_generator.state_dict()) # check if the loaded generator will generate images in the same way that the original generator noise = generator.generate_noise(1, volatile=True) diff = generator(noise) - loaded_generator(noise) self.assertAlmostEqual(0, diff.sum().item(), 6) # execute a train step and now it should be different generator.do_train(tools.cuda(discriminator), images) self.assertFalse(generator(noise).equal(loaded_generator(noise)))
def train_gan(self): torch.cuda.empty_cache() # Constants for training nz = 100 # Beta1 hyperparam for Adam optimizers beta1 = 0.5 # Set random seem for reproducibility manualSeed = 999 b_size = 64 # manualSeed = random.randint(1, 10000) # use if you want new results random.seed(manualSeed) torch.manual_seed(manualSeed) self.Gen_network = tools.cuda(self.Gen_network) self.Disc_network = tools.cuda(self.Disc_network) if(self.init_g_model): self.Gen_network.apply(self.weights_init) self.optimizerG = optim.Adam(self.Gen_network.parameters(), lr=self.descriptor.lrate, betas=(beta1, 0.999)) self.init_g_model = False if(self.init_d_model): self.Disc_network.apply(self.weights_init) self.optimizerD = optim.Adam(self.Disc_network.parameters(), lr=self.descriptor.lrate, betas=(beta1, 0.999)) self.init_d_model = False # DIFFERENT LOSS FUCNTIONS if self.loss_function ==0: criterion = torch.nn.BCELoss() elif (self.loss_function == 2 or 3): BCE_stable = torch.nn.BCEWithLogitsLoss() # Establish convention for real and fake labels during training real_label = 1 fake_label = 0 label = torch.full((b_size,), real_label, device=self.device) f_label = torch.full((b_size,), fake_label, device=self.device) print("Starting Training Loop...") #Todo Lists to keep track of progress- make a self object img_list = [] G_losses = [] D_losses = [] iters = 0 # For each epoch for epoch in range(self.epochs): # For each batch in the dataloader for i, data in enumerate(self.dataloader, 0): # 20 batches per epoch- 64 bsize * 20 batches per epoch -> 1280 samples per epoch if i > 20: break ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### ## Train with all-real batch self.Disc_network.zero_grad() # Format batch real_cpu = data[0].to(self.device) # Forward pass real batch through D y_pred = self.Disc_network(real_cpu).view(-1) # Calculate loss on all-real batch if self.loss_function == 0: # BCE loss errD_real = criterion(y_pred, label) errD_real.backward() elif self.loss_function ==1: # MSE loss errD_real = torch.mean((y_pred - label) ** 2) errD_real.backward() elif self.loss_function == 4: #Hinge loss errD_real = torch.mean(torch.nn.ReLU()(1.0 - y_pred)) errD_real.backward() D_x = y_pred.mean().item() ## Train with all-fake batch # Generate batch of latent vectors noise = torch.randn(b_size, nz, 1, 1, device=self.device) # Generate fake image batch with G fake = self.Gen_network(noise) # label.fill_(fake_label) # Classify all fake batch with D y_pred_fake = self.Disc_network(fake.detach()).view(-1) if self.loss_function == 0: # BCE loss errD_fake = criterion(y_pred_fake, f_label) errD_fake.backward() errD = errD_real + errD_fake elif self.loss_function == 1: # MSE loss errD_fake = torch.mean((y_pred_fake)**2) errD_fake.backward() errD = errD_real + errD_fake elif self.loss_function ==2: # RSGAN errD = BCE_stable(y_pred - y_pred_fake, label) errD.backward() elif self.loss_function == 3: #RAGAN # Add the gradients from the all-real and all-fake batches errD = (BCE_stable(y_pred - torch.mean(y_pred_fake), label) + BCE_stable( y_pred_fake - torch.mean(y_pred),f_label)) / 2 errD.backward() elif self.loss_function == 4: errD = torch.mean(torch.nn.ReLU()(1.0 + y_pred_fake)) # Calculate the gradients for this batch errD.backward() # Calculate the gradients for this batch D_G_z1 = y_pred_fake.mean().item() # Update D self.optimizerD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ########################### self.Gen_network.zero_grad() # Since we just updated D, perform another forward pass of all-fake batch through D y_pred_fake = self.Disc_network(fake).view(-1) # Calculate G's loss based on this output if self.loss_function == 0: #BCE errG = criterion(y_pred_fake, label) elif self.loss_function ==1 : #MSE errG = torch.mean((y_pred_fake - label) ** 2) elif self.loss_function == 2: #RSGAN y_pred = self.Disc_network(real_cpu).view(-1) errG = BCE_stable(y_pred_fake - y_pred, label) elif self.loss_function == 3: #RAGAN y_pred = self.Disc_network(real_cpu).view(-1) errG = (BCE_stable(y_pred - torch.mean(y_pred_fake), f_label) + BCE_stable( y_pred_fake - torch.mean(y_pred),label)) / 2 elif self.loss_function == 4: #Higen loss errG = -torch.mean(y_pred_fake) # Calculate gradients for G errG.backward() D_G_z2 = y_pred_fake.mean().item() # Update G self.optimizerG.step() # Output training stats if i % 10 == 0: print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' % (epoch, self.epochs, i, 20, errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) # Save Losses for plotting later G_losses.append(errG.item()) D_losses.append(errD.item()) # Check how the generator is doing by saving G's output on fixed_noise if (i % 20 == 0): with torch.no_grad(): # Create batch of latent vectors that we will use to visualize # the progression of the generator fixed_noise = torch.randn(64, nz, 1, 1, device=self.device) fake = self.Gen_network(fixed_noise).detach().cpu() img_list.append(vutils.make_grid(fake, padding=2, normalize=True)) path = 'C:/Users/RACHIT/Desktop/CAGAN/output_images/'+str(self.indi_no)+'/' size_figure_grid = 5 fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5)) for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)): ax[i, j].get_xaxis().set_visible(False) ax[i, j].get_yaxis().set_visible(False) path = path + 'Gen_'+str(self.gen_no)+'_Offspring_'+str(self.offspring)+'.png' for k in range(5 * 5): i = k // 5 j = k % 5 ax[i, j].cla() ax[i, j].imshow(fake[k, 0].cpu().data.numpy(), cmap='gray') fig.savefig(path) plt.close() iters += 1 self.Gen_network = self.Gen_network.cpu() self.Disc_network = self.Disc_network.cpu() torch.cuda.empty_cache() ################################################################################################### # ############################################ TEST MAIN FUNCTION ############################################### ################################################################################################### # def main(): # # # # Root directory for dataset # dataroot = "mnist_png/training" # image_size = 64 # dataset = dset.ImageFolder(root=dataroot, # transform=transforms.Compose([ # transforms.Grayscale(1), # transforms.Resize(image_size), # transforms.CenterCrop(image_size), # transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # ])) # # Create the dataloader # dataloader = torch.utils.data.DataLoader(dataset, batch_size=64, # shuffle=True, num_workers=2) # # # input_channel =1 # output_dim =64 # lrate = 0.001 # lossfunction = 3 # epochs = 50 # my_gan_descriptor = GANDescriptor(input_channel, output_dim, lossfunction, lrate,dataloader,epochs,1) # # # g_layer = np.random.randint(2,11) # Number of hidden layers # g_layer = 6 # # g_activations = [1 for x in range(g_layer - 1)] # g_activations = [1, 1, 1, 0, 1] # gchannels={5: [512,256,128,64,64], # 6: [512,512,256,128,64,64], # 7: [512,512,256,256,128,64,64], # 8: [512,512,256,256,128,128,64,64], # 9: [512,512,256,256,128,128,64,64,64], # 10:[512,512,512,256,256,128,128,64,64,64]} # g_opChannels = gchannels[g_layer] # # # g_weight_init = 0 # g_loop = 1 # nz = 100 # # my_gan_descriptor.gan_generator_initialization(g_layer, input_channel,output_dim,g_opChannels, # g_weight_init,g_activations,g_loop) # # # d_layer = np.random.randint(2,9) # Number of hidden layers # d_layer = 6 # d_weight_init = 0 # # d_activations = [0 for x in range(d_layer - 1)] # d_activations = [1, 0, 0, 1, 1] # dchannels={5: [64,128,256,512,512], # 6: [64,64,128,256,512,512], # 7: [64,64,128,128,256,512,512], # 8: [64,64,128,128,256,256,512,512], # 9: [64,64,128,128,256,256,512,512,512], # 10:[64,64,64,128,128,256,256,512,512,512]} # d_opChannels = dchannels[d_layer] # d_loop = 1 # my_gan_descriptor.gan_discriminator_initialization(d_layer, input_channel,output_dim,d_opChannels, # d_weight_init,d_activations ,d_loop) # # # individual = GAN(my_gan_descriptor) # print(individual.Gen_network) # print(individual.Disc_network) # # individual.train_gan() # print(individual.getFIDScore()) # # if __name__ == '__main__': # # freeze_support() here if program needs to be frozen # main() # exec
def generate(self, input_shape, g_pop, d_pop, epoch, num_epochs, train_loader, validation_loader): if epoch % config.stats.print_interval != 0 and epoch != num_epochs - 1: return generators = g_pop.sorted() discriminators = d_pop.sorted() G = g_pop.best() D = d_pop.best() G.eval() D.eval() # this should never ocurr! if G.invalid or D.invalid: logger.error("invalid D or G") return self.input_shape = input_shape if self.test_noise is None: self.test_noise = G.generate_noise( config.stats.num_generated_samples, volatile=True).cpu() # display noise only once # grid_noise = vutils.make_grid(self.test_noise.data, normalize=True, scale_each=True, nrow=4) # self.writer.add_image('Image/Noise', grid_noise) if config.stats.calc_rmse_score: rmse_score.initialize(train_loader, config.evolution.fitness.fid_sample_size) for g in generators: g.calc_rmse_score() if config.stats.calc_inception_score: for g in generators: g.inception_score() self.writer.add_scalars('Training/Inception_score', {"Best_G": G.inception_score_mean}, epoch) if G.fid_score is not None: self.writer.add_scalars('Training/Fid_score', {"Best_G": G.fid_score}, epoch) self.save_data( epoch, g_pop, d_pop, config.stats.save_best_model and (epoch == num_epochs - 1 or epoch % config.stats.save_best_interval == 0)) self.writer.add_scalars( 'Training/Trained_samples', { "Best_D": D.trained_samples, "Best_G": G.trained_samples, "D": sum([p.trained_samples for p in discriminators]) / len(discriminators), "G": sum([p.trained_samples for p in generators]) / len(generators) }, epoch) self.writer.add_scalars('Training/Loss', { "Best_D": D.error, "Best_G": G.error }, epoch) self.writer.add_scalars('Training/Fitness', { "Best_D": D.fitness(), "Best_G": G.fitness() }, epoch) self.writer.add_scalars('Training/Generation', { "Best_D": D.genome.generation, "Best_G": G.genome.generation }, epoch) self.writer.add_histogram('Training/Loss/D', np.array([p.error for p in discriminators]), epoch) self.writer.add_histogram('Training/Loss/G', np.array([p.error for p in generators]), epoch) self.writer.add_histogram( 'Training/Trained_samples/D', np.array([p.trained_samples for p in discriminators]), epoch) self.writer.add_histogram( 'Training/Trained_samples/G', np.array([p.trained_samples for p in generators]), epoch) # generate images with the best perfomings G's for i, gen in enumerate(generators[:config.stats.print_best_amount]): image_path = None if i == 0: image_path = '%s/images/generated-%05d.png' % ( self.writer.file_writer.get_logdir(), epoch) grid = self.generate_image(gen, path=image_path) self.writer.add_image('Image/Best_G/%d' % i, grid, epoch) # write architectures for best G and D self.writer.add_text( 'Graph/Best_G', str([str(p) for p in generators[:config.stats.print_best_amount]]), epoch) self.writer.add_text( 'Graph/Best_D', str([ str(p) for p in discriminators[:config.stats.print_best_amount] ]), epoch) # apply best G and D in the validator dataset # FIXME: the validation dataset was already evaluated at this point. Just reuse the data. if config.stats.display_validation_stats: d_errors_real, d_errors_fake, g_errors = [], [], [] for n, (images, _) in enumerate(validation_loader): images = tools.cuda(Variable(images)) batch_size = images.size(0) d_errors_real.append(D.step_real(images)) fake_error, _ = D.step_fake(G, batch_size) d_errors_fake.append(fake_error) g_errors.append(G.step(D, batch_size).data[0]) # display validation metrics self.writer.add_scalars('Validation/D/Loss', { 'Real': np.mean(d_errors_real), 'Fake': np.mean(d_errors_fake) }, epoch) self.writer.add_scalars( 'Validation/Loss', { 'Best_D': np.mean(d_errors_real + d_errors_fake), 'Best_G': np.mean(g_errors) }, epoch) # display architecture metrics self.writer.add_scalars( 'Architecture/Layers', { 'Best_D': len(D.genome.genes), 'Best_G': len(G.genome.genes), 'D': np.mean([len(p.genome.genes) for p in discriminators]), 'G': np.mean([len(p.genome.genes) for p in generators]) }, epoch) self.writer.add_histogram( 'Architecture/Layers/D', np.array([len(p.genome.genes) for p in discriminators]), epoch) self.writer.add_histogram( 'Architecture/Layers/G', np.array([len(p.genome.genes) for p in generators]), epoch) self.writer.add_scalars( 'Architecture/Invalid', { 'D': sum([p.invalid for p in discriminators]), 'G': sum([p.invalid for p in generators]) }, epoch) self.writer.add_scalars('Architecture/Species', { "D": len(d_pop.species_list), "G": len(g_pop.species_list) }, epoch) self.writer.add_scalars( 'Architecture/Speciation_Threshold', { "D": int(d_pop.speciation_threshold), "G": int(g_pop.speciation_threshold) }, epoch) best_d_used = np.mean([g.used for g in D.genome.genes]) best_g_used = np.mean([g.used for g in G.genome.genes]) d_used = np.mean([ np.mean([g.used for g in p.genome.genes]) for p in discriminators ]) g_used = np.mean( [np.mean([g.used for g in p.genome.genes]) for p in generators]) self.writer.add_scalars('Architecture/Genes_reuse', { 'Best_D': best_d_used, 'Best_G': best_g_used, 'D': d_used, 'G': g_used }, epoch) logger.debug("\n%s: D: %s G: %s", epoch, D.error, G.error) logger.debug(G) logger.debug(G.model) logger.debug(D) logger.debug(D.model) if torch.cuda.is_available(): torch.cuda.empty_cache() logger.debug( (f"memory_allocated: {torch.cuda.memory_allocated()}, " f"max_memory_allocated: {torch.cuda.max_memory_allocated()}, " f"memory_cached: {torch.cuda.memory_cached()}, " f"max_memory_cached: {torch.cuda.max_memory_cached()}")) if config.stats.notify and \ (self.last_notification is None or (datetime.now() - self.last_notification).seconds//60 > config.stats.min_notification_interval): self.last_notification = datetime.now() notify(f"Epoch {epoch}: G {G.fitness():.2f}, D: {D.error:.2f}") # graph plotting # dummy_input = Variable(torch.randn(28, 28)).cuda() # self.writer.add_graph(D.model, (dummy_input, )) # dummy_input = Variable(torch.randn(10, 10)).cuda() # self.writer.add_graph(G.model, (dummy_input, )) # flush writer to avoid memory issues self.writer.scalar_dict = {}
import torch from util import tools ONE = tools.cuda(torch.tensor(1.0)) MONE = tools.cuda(torch.tensor(-1.0))