def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data): # Start training # Define optimizers disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) # Define Logging to Tensorboard summary_writer = tf.summary.create_file_writer( f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}' ) fixed_z, fixed_c = get_fixed_random( config, num_to_generate=100) # fixed_noise is just used for visualization. # Define metric metric_loss_gen_en = tf.keras.metrics.Mean() metric_loss_disc = tf.keras.metrics.Mean() # Start training epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32) for epoch in range(config.num_epochs): logging.info(f'Start epoch {epoch+1} ...') # logs a message. epoch_tf.assign(epoch) start_time = time.time() train_epoch(train_data, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config) epoch_time = time.time() - start_time # Save results logging.info( f'Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}' ) with summary_writer.as_default(): tf.summary.scalar('Generator and Encoder loss', metric_loss_gen_en.result(), step=epoch) tf.summary.scalar('Discriminator loss', metric_loss_disc.result(), step=epoch) metric_loss_gen_en.reset_states() metric_loss_disc.reset_states() # Generated images and reconstructed images gen_image = generate_images(gen, fixed_z, fixed_c, config) with summary_writer.as_default(): tf.summary.image('Generated Images', tf.expand_dims(gen_image, axis=0), step=epoch)
def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data, train_data_repeat, model_copies=None): if config.load_model or config.do_eval: if config.load_from_further: config_dir = Path('~/models/dg_further/configs').expanduser() else: config_dir = Path('~/models/dg/exp/configs').expanduser() for cdir in config_dir.iterdir(): ldir = cdir / 'logs' try: cmdline = (ldir / 'cmdlog.txt').read_text() cparams = dict(k[2:].split('=') for k in cmdline.split() if k.startswith('--')) print(cparams) if (cparams['dataset'] == config.dataset and cparams['train_dg'] == str(config.train_dg) and cparams['steps_dg'] == str(config.steps_dg) and cparams['conditional'] == str(config.conditional)): checkpoint_dir = list(ldir.glob('*/checkpoints'))[0] break except: pass else: raise ValueError('Model not found') train_data_iterator = iter(train_data_repeat) image, label = next(train_data_iterator) if config.train_dg or config.do_eval or config.load_model: forward_pass_fn_d_const = get_forward_pass_fn() forward_pass_fn_g_const = get_forward_pass_fn() forward_pass_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], config.train_batch_size, config.num_cont_noise, config, update_d=False) forward_pass_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, config.train_batch_size, config.num_cont_noise, config, update_g=False) else: forward_pass_fn_d_const = get_forward_pass_fn() forward_pass_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, config.train_batch_size, config.num_cont_noise, config) forward_pass_fn_g_const = None # Start training # Define optimizers disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) dg_optimizer_d = tf.optimizers.Adam(learning_rate=config.lr_dg, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) dg_optimizer_g = tf.optimizers.Adam(learning_rate=config.lr_dg, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) out_dir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}' # Define Logging to Tensorboard summary_writer = tf.summary.create_file_writer(out_dir) fixed_z, fixed_c = get_fixed_random( config, num_to_generate=100) # fixed_noise is just used for visualization. # Define metric metric_loss_gen_en = tf.keras.metrics.Mean() metric_loss_disc = tf.keras.metrics.Mean() metric_loss_dg = tf.keras.metrics.Mean() if config.train_dg or config.do_eval or config.load_model: train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const) train_step_fn_g_const = get_train_step_fn(forward_pass_fn_g_const) train_step_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_d=False) train_step_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_g=False) else: train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const) train_step_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config) train_step_fn_g_const = None ckpt = tf.train.Checkpoint(gen=gen, disc_f=disc_f, disc_h=disc_h, disc_j=disc_j, model_en=model_en) if config.save_model: ckpt_mgr = tf.train.CheckpointManager(ckpt, out_dir + '/checkpoints/', config.num_epochs + 1) ckpt_mgr.save(0) if config.load_model and not config.do_eval: ckpt.restore(tf.train.latest_checkpoint( str(checkpoint_dir))).assert_consumed() # Start training epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32) for epoch in range(0, config.num_epochs + config.do_eval): logging.info(f'Start epoch {epoch+1} ...') # logs a message. epoch_tf.assign(epoch) start_time = time.time() if config.do_eval: ckpt.restore(str(checkpoint_dir) + f'/ckpt-{epoch}').assert_consumed() train_epoch(train_step_fn_d_const, train_step_fn_g_const, forward_pass_fn_d_const, forward_pass_fn_g_const, train_data, train_data_iterator, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, dg_optimizer_g, dg_optimizer_d, metric_loss_disc, metric_loss_gen_en, metric_loss_dg, config.train_batch_size, config.num_cont_noise, config, model_copies=model_copies) epoch_time = time.time() - start_time # Save results logging.info( f'Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}, DG: {metric_loss_dg.result()}' ) with summary_writer.as_default(): tf.summary.scalar('Generator and Encoder loss', metric_loss_gen_en.result(), step=epoch) tf.summary.scalar('Discriminator loss', metric_loss_disc.result(), step=epoch) tf.summary.scalar('Duality Gap', metric_loss_dg.result(), step=epoch) metric_loss_gen_en.reset_states() metric_loss_dg.reset_states() metric_loss_disc.reset_states() # Generate images gen_image = generate_images(gen, fixed_z, fixed_c, config) with summary_writer.as_default(): tf.summary.image('Generated Images', tf.expand_dims(gen_image, axis=0), step=epoch) if config.save_model: ckpt_mgr.save(epoch + 1)
def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data, test_data, train_data_repeat, model_copies=None): if config.eval_metrics: @tf.function def get_mnist_eval_metrics(real, fake): frechet = tfgan_mnist.mnist_frechet_distance(real, fake, 1) score = tfgan_mnist.mnist_score(fake, 1) return tf.stack(list(map(tf.stop_gradient, (frechet, score)))) if config.load_model or config.do_eval: if config.load_from_further: # config_dir = Path('~/models/dg_further/configs').expanduser() config_dir = Path('~/models/dg_further_2/configs').expanduser() else: config_dir = Path('~/models/dg/exp/configs').expanduser() for cdir in config_dir.iterdir(): ldir = cdir / 'logs' try: cmdline = (ldir / 'cmdlog.txt').read_text() cparams = dict(k[2:].split('=') for k in cmdline.split() if k.startswith('--')) print(cparams) if ( cparams['dataset'] == config.dataset and cparams['train_dg'] == str(config.train_dg) and cparams['steps_dg'] == str(config.steps_dg) and cparams['conditional'] == str(config.conditional) ): checkpoint_dir = list(ldir.glob('*/checkpoints'))[0] break except: pass else: raise ValueError('Model not found: {}'.format([config.dataset, config.train_dg, config.steps_dg, config.conditional])) train_data_iterator = iter(train_data_repeat) image, label = next(train_data_iterator) if config.train_dg or config.do_eval or config.load_model: forward_pass_fn_d_const = get_forward_pass_fn() forward_pass_fn_g_const = get_forward_pass_fn() forward_pass_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], config.train_batch_size, config.num_cont_noise, config, update_d=False) forward_pass_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, config.train_batch_size, config.num_cont_noise, config, update_g=False) else: forward_pass_fn_d_const = get_forward_pass_fn() forward_pass_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, config.train_batch_size, config.num_cont_noise, config) forward_pass_fn_g_const = None # Start training # Define optimizers disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) dg_optimizer_d = tf.optimizers.Adam(learning_rate=config.lr_dg, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) dg_optimizer_g = tf.optimizers.Adam(learning_rate=config.lr_dg, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) out_dir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}' # Define Logging to Tensorboard summary_writer = tf.summary.create_file_writer(out_dir) fixed_z, fixed_c = get_fixed_random(config, num_to_generate=100) # fixed_noise is just used for visualization. # Define metric metric_loss_gen_en = tf.keras.metrics.Mean() metric_loss_disc = tf.keras.metrics.Mean() metric_loss_dg = tf.keras.metrics.Mean() if config.train_dg or config.do_eval or config.load_model: train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const) train_step_fn_g_const = get_train_step_fn(forward_pass_fn_g_const) train_step_fn_d_const(image, label, model_copies[gen], disc_f, disc_h, disc_j, model_copies[model_en], disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_d=False) train_step_fn_g_const(image, label, gen, model_copies[disc_f], model_copies[disc_h], model_copies[disc_j], model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config, update_g=False) else: train_step_fn_d_const = get_train_step_fn(forward_pass_fn_d_const) train_step_fn_d_const(image, label, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config) train_step_fn_g_const = None ckpt = tf.train.Checkpoint(gen=gen, disc_f=disc_f, disc_h=disc_h, disc_j=disc_j, model_en=model_en) if config.save_model: ckpt_mgr = tf.train.CheckpointManager(ckpt, out_dir + '/checkpoints/', config.num_epochs+1) ckpt_mgr.save(0) if config.load_model and not config.do_eval: ckpt.restore(tf.train.latest_checkpoint(str(checkpoint_dir))).assert_consumed() # Start training epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32) for epoch in range(0, config.num_epochs + config.do_eval): logging.info(f'Start epoch {epoch+1} ...') # logs a message. epoch_tf.assign(epoch) start_time = time.time() if config.do_eval: ckpt.restore(str(checkpoint_dir) + f'/ckpt-{epoch}').assert_consumed() datasets = [ (train_data, 'train') ] if config.do_eval: datasets.append((test_data, 'test')) for dataset, dataset_name in datasets: if config.eval_metrics: if config.only_eval_last: if epoch < config.num_epochs: continue num_points_eval = 10000 if config.only_eval_last else 1000 if config.debug: num_points_eval = 100 eval_metrics_batches = num_points_eval // config.train_batch_size num_points_eval = eval_metrics_batches * config.train_batch_size real_images = [] fake_images = [] frechet = [] score = [] for batch in tqdm.tqdm(itt.islice(dataset, 0, eval_metrics_batches), total=eval_metrics_batches): z, c = get_fixed_random(config, num_to_generate=config.train_batch_size) fake = generate_images(gen, z, c, config, do_plot=False) real = batch[0] real, fake = map(lambda x: x * 2. - 1., (real, fake)) if len(real_images) < 200 or config.only_save: real_images.extend([np.array(i) for i in real]) fake_images.extend([np.array(i) for i in fake]) if config.only_save: continue if config.dataset == 'mnist': real, fake = map(lambda x: tf.image.resize(x, [28, 28]), (real, fake)) f, s = get_mnist_eval_metrics(real, fake) frechet.append(f) score.append(s) elif config.dataset == 'cifar10': frechet.append(0.) score.append(0.) elif config.dataset == 'fashion_mnist': frechet.append(0.) score.append(0.) frechet = sum(frechet) / max(1, len(frechet)) score = sum(score) / max(1, len(score)) with summary_writer.as_default(): tf.summary.scalar(f'Frechet Distance ({dataset_name})',frechet,step=epoch) tf.summary.scalar(f'Score ({dataset_name})', score,step=epoch) real_images, fake_images = map(np.array, (real_images, fake_images)) real_images, fake_images = map(lambda x: ((x+1.)/2.*255.).round().astype(np.uint8), (real_images, fake_images)) np.save(f'logs/real-{epoch}.npy', real_images) np.save(f'logs/fake-{epoch}.npy', fake_images) continue train_epoch(train_step_fn_d_const, train_step_fn_g_const, forward_pass_fn_d_const, forward_pass_fn_g_const, train_data, train_data_iterator, gen,disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, dg_optimizer_g, dg_optimizer_d, metric_loss_disc, metric_loss_gen_en, metric_loss_dg, config.train_batch_size, config.num_cont_noise, config, model_copies=model_copies) epoch_time = time.time()-start_time # Save results logging.info(f'{dataset_name} - Epoch {epoch+1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}, DG: {metric_loss_dg.result()}') with summary_writer.as_default(): tf.summary.scalar(f'Generator and Encoder loss ({dataset_name})',metric_loss_gen_en.result(),step=epoch) tf.summary.scalar(f'Discriminator loss ({dataset_name})', metric_loss_disc.result(),step=epoch) tf.summary.scalar(f'Duality Gap ({dataset_name})',metric_loss_dg.result(),step=epoch) metric_loss_gen_en.reset_states() metric_loss_dg.reset_states() metric_loss_disc.reset_states() # Generate images gen_image = generate_images(gen, fixed_z, fixed_c, config) with summary_writer.as_default(): tf.summary.image('Generated Images', tf.expand_dims(gen_image,axis=0),step=epoch) if config.save_model: ckpt_mgr.save(epoch+1)
def train(config, gen, disc_f, disc_h, disc_j, model_en, train_data): # Start training # Define optimizers disc_optimizer = tf.optimizers.Adam(learning_rate=config.lr_disc, beta_1=config.beta_1_disc, beta_2=config.beta_2_disc) gen_en_optimizer = tf.optimizers.Adam(learning_rate=config.lr_gen_en, beta_1=config.beta_1_gen_en, beta_2=config.beta_2_gen_en) # Define Logging to Tensorboard summary_writer = tf.summary.create_file_writer( f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}' ) fixed_z, fixed_c = get_fixed_random( config, num_to_generate=100) # fixed_noise is just used for visualization. def fake_gene(x, *args, **kwargs): return x for image, label in train_data: image, image_aug = tf.split(image, 2, axis=-1) real_image = generate_images(fake_gene, image[:100], label[:100], config) with summary_writer.as_default(): tf.summary.image('Real Images', tf.expand_dims(real_image, axis=0), step=0) break # Define metric metric_loss_gen_en = tf.keras.metrics.Mean() metric_loss_disc = tf.keras.metrics.Mean() basedir = f'{config.result_path}/{config.model}_{config.dataset}_{time.strftime("%Y-%m-%d--%H-%M-%S")}' # Start training epoch_tf = tf.Variable(0, trainable=False, dtype=tf.float32) for epoch in range(config.num_epochs): logging.info(f'Start epoch {epoch + 1} ...') # logs a message. epoch_tf.assign(epoch) start_time = time.time() train_epoch(train_data, gen, disc_f, disc_h, disc_j, model_en, disc_optimizer, gen_en_optimizer, metric_loss_disc, metric_loss_gen_en, config.train_batch_size, config.num_cont_noise, config) epoch_time = time.time() - start_time # Save results logging.info( f'Epoch {epoch + 1}: Disc_loss: {metric_loss_disc.result()}, Gen_loss: {metric_loss_gen_en.result()}, Time: {epoch_time}' ) with summary_writer.as_default(): tf.summary.scalar('Generator and Encoder loss', metric_loss_gen_en.result(), step=epoch) tf.summary.scalar('Discriminator loss', metric_loss_disc.result(), step=epoch) metric_loss_gen_en.reset_states() metric_loss_disc.reset_states() # Generated images and reconstructed images gen_image = generate_images(gen, fixed_z, fixed_c, config) with summary_writer.as_default(): tf.summary.image('Generated Images', tf.expand_dims(gen_image, axis=0), step=epoch) if epoch % 25 == 0 or epoch == config.num_epochs - 1: for model, name in zip( [gen, disc_f, disc_h, disc_j, model_en], ['gen', 'disc_f', 'disc_h', 'disc_j', 'model_en']): model.save_weights(f'{basedir}/{name}_{epoch}')