def generate_digit_samples(self, path_to_model_checkpoint, num_of_pics, digit): loaded_model = utils.load_model( path_to_model_checkpoint=path_to_model_checkpoint) if not os.path.exists("results_pics/"): os.makedirs("results_pics/") utils.generate_images(latent_dim=self.arch_dict['latent_code'], loaded_model=loaded_model, num_of_pics=num_of_pics, digit=digit)
def evaluate(args, current_iter, gen, device, inception_model=None, eval_iter=None): """Evaluate model using 100 mini-batches.""" calc_fid = (inception_model is not None) and (eval_iter is not None) num_batches = args.n_eval_batches gen.eval() fake_list, real_list = [], [] conditional = args.cGAN for i in range(1, num_batches + 1): if conditional: class_id = i % args.num_classes else: class_id = None fake = utils.generate_images(gen, device, args.batch_size, args.gen_dim_z, args.gen_distribution, class_id=class_id) if calc_fid and i <= args.n_fid_batches: fake_list.append((fake.cpu().numpy() + 1.0) / 2.0) real_list.append((next(eval_iter)[0].cpu().numpy() + 1.0) / 2.0) # Save generated images. root = args.eval_image_root if conditional: root = os.path.join(root, "class_id_{:04d}".format(i)) if not os.path.isdir(root): os.makedirs(root) fn = "image_iter_{:07d}_batch_{:04d}.png".format(current_iter, i) torchvision.utils.save_image(fake, os.path.join(root, fn), nrow=4, normalize=True, scale_each=True) # Calculate FID scores if calc_fid: fake_images = np.concatenate(fake_list) real_images = np.concatenate(real_list) mu_fake, sigma_fake = metrics.fid.calculate_activation_statistics( fake_images, inception_model, args.batch_size, device=device) mu_real, sigma_real = metrics.fid.calculate_activation_statistics( real_images, inception_model, args.batch_size, device=device) fid_score = metrics.fid.calculate_frechet_distance( mu_fake, sigma_fake, mu_real, sigma_real) else: fid_score = -1000 gen.train() return fid_score
def train(dataset, epochs, generator, discriminator, generator_loss, discriminator_loss, generator_optimizer, discriminator_optimizer, loss_object, test_dataset, checkpoint, checkpoint_prefix): for epoch in range(epochs): print('epoch #{}/{}'.format(epoch + 1, epochs)) start = time.time() for idx, (input_image, target) in enumerate(dataset): # print('processing batch {}'.format(idx)) train_step(input_image, target, generator, discriminator, generator_loss, discriminator_loss, generator_optimizer, discriminator_optimizer, loss_object) # clear_output(wait=True) for inp, tar in test_dataset.take(1): generate_images(generator, inp, tar, epoch + 1) # saving (checkpoint) the model every 20 epochs if (epoch + 1) % 20 == 0: checkpoint.save(file_prefix=checkpoint_prefix) print('Time taken for epoch {} is {} sec\n'.format( epoch + 1, time.time() - start))
def next_mini_batch(self): start = self.index_in_epoch self.index_in_epoch += self.mb_size self.current_epoch += self.mb_size/len(self.x_train) # adapt length of permutation array if not len(self.perm_array) == len(self.x_train): self.perm_array = np.arange(len(self.x_train)) # shuffle once at the start of epoch if start == 0: np.random.shuffle(self.perm_array) # at the end of the epoch if self.index_in_epoch > self.x_train.shape[0]: np.random.shuffle(self.perm_array) # shuffle data start = 0 # start next epoch self.index_in_epoch = self.mb_size # set index to mini batch size if self.train_on_augmented_data: # use augmented data for the next epoch self.x_train_aug = utils.normalize_data(utils.generate_images(self.x_train)) self.y_train_aug = self.y_train end = self.index_in_epoch if self.train_on_augmented_data: # use augmented data x_tr = self.x_train_aug[self.perm_array[start:end]] y_tr = self.y_train_aug[self.perm_array[start:end]] else: # use original data x_tr = self.x_train[self.perm_array[start:end]] y_tr = self.y_train[self.perm_array[start:end]] return x_tr, y_tr
lossD_real = lossfunc(netD(real_images), one_labels) lossD_fake = lossfunc(netD(fake_images.detach()), zero_labels) lossD = lossD_real + lossD_fake netD.zero_grad() lossD.backward() optimizerD.step() ########################## # Training generator # ########################## #fake_images = netG(noise) lossG = lossfunc(netD(fake_images), one_labels) netG.zero_grad() lossG.backward() optimizerG.step() if i%100 == 0: print('Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}'.format(epoch+1, opt.num_epochs, i+1, num_batches, lossD.item(), lossG.item())) generate_images(epoch, opt.output_path, fixed_noise, opt.num_test_samples, opt.nsize, netG, device, use_fixed=opt.use_fixed) # Save gif: save_gif(opt.output_path, opt.fps, fixed_noise=opt.use_fixed)
loss_D_A_real = gan_loss(disc_target_real, tf.ones_like(disc_target_real)) loss_D_A_fake = gan_loss(disc_target_fake, tf.zeros_like(disc_target_fake)) loss_D_A = (loss_D_A_real + loss_D_A_fake) * 0.5 loss_D_B_real = gan_loss(disc_source_real, tf.ones_like(disc_source_real)) loss_D_B_fake = gan_loss(disc_source_fake, tf.zeros_like(disc_source_fake)) loss_D_B = (loss_D_B_real + loss_D_B_fake) * 0.5 gradients_of_G_A = netG_A_tape.gradient(loss_G_A, netG_A.variables) gradients_of_G_B = netG_B_tape.gradient(loss_G_B, netG_B.variables) gradients_of_D_A = netD_A_tape.gradient(loss_D_A, netD_A.variables) gradients_of_D_B = netD_B_tape.gradient(loss_D_B, netD_B.variables) optimizer_G.apply_gradients(zip(gradients_of_G_A, netG_A.variables)) optimizer_G.apply_gradients(zip(gradients_of_G_B, netG_B.variables)) optimizer_D.apply_gradients(zip(gradients_of_D_A, netD_A.variables)) optimizer_D.apply_gradients(zip(gradients_of_D_B, netD_B.variables)) loss_G_A_log.append(loss_G_A.numpy()) plt.plot(loss_G_A_log) netG_A.save_weights("./in_ckpt_2gen100/") for ((src_image, _), (tar_image, _)) in dataset.take(1): generate_images(netG_A, src_image, tar_image, plots=plots)
def train_graph(self, sess, x_train, y_train, x_valid, y_valid, n_epoch = 1, train_on_augmented_data = False): # train on original or augmented data self.train_on_augmented_data = train_on_augmented_data # training and validation data self.x_train = x_train self.y_train = y_train self.x_valid = x_valid self.y_valid = y_valid # use augmented data if self.train_on_augmented_data: print('generate new set of images') self.x_train_aug = utils.normalize_data(utils.generate_images(self.x_train)) self.y_train_aug = self.y_train # parameters mb_per_epoch = self.x_train.shape[0]/self.mb_size train_loss, train_acc, valid_loss, valid_acc = [],[],[],[] # start timer start = datetime.datetime.now() print(datetime.datetime.now().strftime('%d-%m-%Y %H:%M:%S'),': start training') print('learnrate = ',self.learn_rate,', n_epoch = ', n_epoch, ', mb_size = ', self.mb_size) # looping over mini batches for i in range(int(n_epoch*mb_per_epoch)+1): # adapt learn_rate self.learn_rate_pos = int(self.current_epoch // self.learn_rate_step_size) if not self.learn_rate == self.learn_rate_array[self.learn_rate_pos]: self.learn_rate = self.learn_rate_array[self.learn_rate_pos] print(datetime.datetime.now()-start,': set learn rate to %.6f'%self.learn_rate) # get new batch x_batch, y_batch = self.next_mini_batch() # run the graph sess.run(self.train_step_tf, feed_dict={self.x_data_tf: x_batch, self.y_data_tf: y_batch, self.keep_prob_tf: self.keep_prob, self.learn_rate_tf: self.learn_rate}) # store losses and accuracies if i%int(self.log_step*mb_per_epoch) == 0 or i == int(n_epoch*mb_per_epoch): self.n_log_step += 1 # for logging the results feed_dict_train = { self.x_data_tf: self.x_train[self.perm_array[:len(self.x_valid)]], self.y_data_tf: self.y_train[self.perm_array[:len(self.y_valid)]], self.keep_prob_tf: 1.0} feed_dict_valid = {self.x_data_tf: self.x_valid, self.y_data_tf: self.y_valid, self.keep_prob_tf: 1.0} # summary for tensorboard if self.use_tb_summary: train_summary = sess.run(self.merged, feed_dict = feed_dict_train) valid_summary = sess.run(self.merged, feed_dict = feed_dict_valid) self.train_writer.add_summary(train_summary, self.n_log_step) self.valid_writer.add_summary(valid_summary, self.n_log_step) train_loss.append(sess.run(self.cross_entropy_tf, feed_dict = feed_dict_train)) train_acc.append(self.accuracy_tf.eval(session = sess, feed_dict = feed_dict_train)) valid_loss.append(sess.run(self.cross_entropy_tf, feed_dict = feed_dict_valid)) valid_acc.append(self.accuracy_tf.eval(session = sess, feed_dict = feed_dict_valid)) print('%.2f epoch: train/val loss = %.4f/%.4f, train/val acc = %.4f/%.4f'%( self.current_epoch, train_loss[-1], valid_loss[-1], train_acc[-1], valid_acc[-1])) # concatenate losses and accuracies and assign to tensor variables tl_c = np.concatenate([self.train_loss_tf.eval(session=sess), train_loss], axis = 0) vl_c = np.concatenate([self.valid_loss_tf.eval(session=sess), valid_loss], axis = 0) ta_c = np.concatenate([self.train_acc_tf.eval(session=sess), train_acc], axis = 0) va_c = np.concatenate([self.valid_acc_tf.eval(session=sess), valid_acc], axis = 0) sess.run(tf.assign(self.train_loss_tf, tl_c, validate_shape = False)) sess.run(tf.assign(self.valid_loss_tf, vl_c , validate_shape = False)) sess.run(tf.assign(self.train_acc_tf, ta_c , validate_shape = False)) sess.run(tf.assign(self.valid_acc_tf, va_c , validate_shape = False)) print('running time for training: ', datetime.datetime.now() - start) return None
Given the name of a Dynamo table and some image data, push it into DynamoDB. """ dynamodb = boto3.resource('dynamodb') table = dynamodb.Table(table_name) with table.batch_writer() as batch: for i, image in enumerate(image_data, start=1): print('Pushing image %d with ID %s' % (i, image['image_no_calc'])) batch.put_item( Item={ 'MiroID': image['image_no_calc'], 'MiroCollection': collection_name, 'ReindexShard': 'default', 'ReindexVersion': 1, 'data': json.dumps(image, separators=(',', ':')) } ) if i % 50 == 0: time.sleep(5) if __name__ == '__main__': args = docopt.docopt(__doc__) image_data = generate_images(bucket=args['--bucket'], key=args['--key']) push_to_dynamodb( table_name=args['--table'], collection_name=args['--collection'], image_data=image_data )
def main(): args = parsing() os.environ["CUDA_VISIBLE_DEVICES"] = "1" BATCH_SIZE = 64 TRAINING_RATIO = 5 GRADIENT_PENALTY_WEIGHT = 10 # As per the paper X_train = get_mnist() generator = make_generator() discriminator = make_discriminator() # The generator_model is used when we want to train the generator layers. # As such, we ensure that the discriminator layers are not trainable. # Note that once we compile this model, updating .trainable will have no effect within # it. As such, it won't cause problems if we later set discriminator.trainable = True # for the discriminator_model, as long as we compile the generator_model first. for layer in discriminator.layers: layer.trainable = False discriminator.trainable = False generator_input = Input(shape=(100, )) generator_layers = generator(generator_input) discriminator_layers_for_generator = discriminator(generator_layers) generator_model = Model(inputs=[generator_input], outputs=[discriminator_layers_for_generator]) # We use the Adam paramaters from Gulrajani et al. generator_model.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9), loss=wloss) # Now that the generator_model is compiled, we can make the discriminator # layers trainable. for layer in discriminator.layers: layer.trainable = True for layer in generator.layers: layer.trainable = False discriminator.trainable = True generator.trainable = False # The discriminator_model is more complex. It takes both real image samples and random # noise seeds as input. The noise seed is run through the generator model to get # generated images. Both real and generated images are then run through the # discriminator. Although we could concatenate the real and generated images into a # single tensor, we don't (see model compilation for why). real_samples = Input(shape=X_train.shape[1:]) generator_input_for_discriminator = Input(shape=(100, )) generated_samples_for_discriminator = generator( generator_input_for_discriminator) discriminator_output_from_generator = discriminator( generated_samples_for_discriminator) discriminator_output_from_real_samples = discriminator(real_samples) # We also need to generate weighted-averages of real and generated samples, # to use for the gradient norm penalty. averaged_samples = RandomWeightedAverage(BATCH_SIZE)( [real_samples, generated_samples_for_discriminator]) # We then run these samples through the discriminator as well. Note that we never # really use the discriminator output for these samples - we're only running them to # get the gradient norm for the gradient penalty loss. averaged_samples_out = discriminator(averaged_samples) # The gradient penalty loss function requires the input averaged samples to get # gradients. However, Keras loss functions can only have two arguments, y_true and # y_pred. We get around this by making a partial() of the function with the averaged # samples here. partial_gp_loss = partial(gradient_penalty_wloss, averaged_samples=averaged_samples, gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT) # Functions need names or Keras will throw an error partial_gp_loss.__name__ = 'gradient_penalty' # Keras requires that inputs and outputs have the same number of samples. This is why # we didn't concatenate the real samples and generated samples before passing them to # the discriminator: If we had, it would create an output with 2 * BATCH_SIZE samples, # while the output of the "averaged" samples for gradient penalty # would have only BATCH_SIZE samples. # If we don't concatenate the real and generated samples, however, we get three # outputs: One of the generated samples, one of the real samples, and one of the # averaged samples, all of size BATCH_SIZE. This works neatly! discriminator_model = Model( inputs=[real_samples, generator_input_for_discriminator], outputs=[ discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out ]) # We use the Adam paramaters from Gulrajani et al. We use the Wasserstein loss for both # the real and generated samples, and the gradient penalty loss for the averaged samples discriminator_model.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9), loss=[wloss, wloss, partial_gp_loss]) # We make three label vectors for training. positive_y is the label vector for real # samples, with value 1. negative_y is the label vector for generated samples, with # value -1. The dummy_y vector is passed to the gradient_penalty loss function and # is not used. positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32) negative_y = -positive_y dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32) d_loss = [] discriminator_loss = [] generator_loss = [] print('Training...') for epoch in range(100): print("---epoch: %d---" % epoch) np.random.shuffle(X_train) print("Epoch: ", epoch) print("Number of batches: ", int(X_train.shape[0] // BATCH_SIZE)) minibatches_size = BATCH_SIZE * TRAINING_RATIO for i in range(int(X_train.shape[0] // (BATCH_SIZE * TRAINING_RATIO))): print("batch: ", i) discriminator_minibatches = X_train[i * minibatches_size:(i + 1) * minibatches_size] for j in range(TRAINING_RATIO): image_batch = discriminator_minibatches[j * BATCH_SIZE:(j + 1) * BATCH_SIZE] noise = np.random.rand(BATCH_SIZE, 100).astype(np.float32) discriminator_loss.append( discriminator_model.train_on_batch( [image_batch, noise], [positive_y, negative_y, dummy_y])) d_loss.append(np.mean(np.array(discriminator_loss[-5:]), axis=0)) generator_loss.append( generator_model.train_on_batch(np.random.rand(BATCH_SIZE, 100), positive_y)) generate_images(generator, args.output_dir, epoch) loss_dict = {"g_loss": generator_loss, "d_loss": d_loss} with open("loss.pkl", "wb") as fo: pickle.dump(loss_dict, fo)
* We start by iterating over the dataset * The generator gets the input image and we get a generated output. * The discriminator receives the input_image and the generated image as the first input. The second input is the input_image and the target_image. * Next, we calculate the generator and the discriminator loss. * Then, we calculate the gradients of loss with respect to both the generator and the discriminator variables(inputs) and apply those to the optimizer. * This entire procedure is shown in the images below. ## Generate Images * After training, its time to generate some images! * We pass images from the test dataset to the generator. * The generator will then translate the input image into the output we expect. * Last step is to plot the predictions and **voila!** """ EPOCHS = 150 train(train_dataset, EPOCHS, generator, discriminator, generator_loss, discriminator_loss, generator_optimizer, discriminator_optimizer, loss_object, test_dataset, checkpoint, checkpoint_prefix) # # restoring the latest checkpoint in checkpoint_dir # checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) # Run the trained model on the entire test dataset for idx, (inp, tar) in enumerate(test_dataset.take(5)): generate_images(generator, inp, tar, 'final_{}'.format(idx))
def evaluate(args, current_iter, gen, device, inception_model=None, eval_iter=None, to_save=False): """Evaluate model using 100 mini-batches.""" calc_fid = (inception_model is not None) and (eval_iter is not None) num_batches = args.n_eval_batches gen.eval() fake_list, real_list = [], [] conditional = args.cGAN class_fake_dict = {x: [] for x in range(args.num_classes)} class_real_dict = {x: [] for x in range(args.num_classes)} for i in range(1, num_batches + 1): if conditional: class_id = i % args.num_classes else: class_id = None fake = utils.generate_images(gen, device, args.batch_size, args.gen_dim_z, args.gen_distribution, class_id=class_id) if calc_fid and i <= args.n_fid_batches: real_data_sample = next(eval_iter) for real_class_label in range(args.num_classes): real_labels = real_data_sample[1].cpu().numpy() these_real_labels = real_labels[real_labels == real_class_label] these_real_ims = real_data_sample[0].cpu().numpy()[ real_labels == real_class_label] class_real_dict[real_class_label].append(these_real_ims) real_list.append((real_data_sample[0].cpu().numpy() + 1.0) / 2.0) class_fake_dict[class_id].append((fake.cpu().numpy() + 1.0) / 2.0) fake_list.append((fake.cpu().numpy() + 1.0) / 2.0) if to_save: # Save generated images. root = args.eval_image_root if conditional: root = os.path.join(root, "class_id_{:04d}".format(i)) if not os.path.isdir(root): os.makedirs(root) fn = "image_iter_{:07d}_batch_{:04d}.png".format(current_iter, i) torchvision.utils.save_image(fake, os.path.join(root, fn), nrow=4, normalize=True, scale_each=True) #prune dicts class_real_dict = prune_dict(class_real_dict) class_fake_dict = prune_dict(class_fake_dict) #calc intra-FID scores for class_idx in range(args.num_classes): real_images = class_real_dict[class_idx] fake_images = class_fake_dict[class_idx] print( "Class Number: {} | Number of real images {}. Number of fake images {}" .format(class_idx, len(real_images), len(fake_images))) mu_fake, sigma_fake = metrics.fid.calculate_activation_statistics( fake_images, inception_model, args.batch_size, device=device) mu_real, sigma_real = metrics.fid.calculate_activation_statistics( real_images, inception_model, args.batch_size, device=device) fid_score = metrics.fid.calculate_frechet_distance( mu_fake, sigma_fake, mu_real, sigma_real) print("Class Label {} || Fid Score {}".format(class_idx, fid_score)) # Calculate FID scores if calc_fid: fake_images = np.concatenate(fake_list) real_images = np.concatenate(real_list) print("Number of real images {}. Number of fake images {}".format( len(real_images), len(fake_images))) mu_fake, sigma_fake = metrics.fid.calculate_activation_statistics( fake_images, inception_model, args.batch_size, device=device) mu_real, sigma_real = metrics.fid.calculate_activation_statistics( real_images, inception_model, args.batch_size, device=device) fid_score = metrics.fid.calculate_frechet_distance( mu_fake, sigma_fake, mu_real, sigma_real) else: fid_score = -1000 gen.train() return fid_score