def __init__(self, x_y, args): g_opt = hem.init_optimizer(args) d_opt = hem.init_optimizer(args) q_opt = hem.init_optimizer(args) x = hem.rescale(x_y[0], (0, 1), (-1, 1)) # 256x256x3 y = hem.rescale(x_y[1], (0, 1), (-1, 1)) # 256x256x1 z = tf.random_uniform((args.batch_size, 1, 256, 256)) # 256x256x1 with tf.variable_scope('generator') as scope: g = info_gan.generator(z, x) with tf.variable_scope('discriminator') as scope: d_real = info_gan.discriminator(y) d_fake = info_gan.discriminator(g, reuse=True) with tf.variable_scope('predictor') as scope: q = info_gan.predictor(g) g_loss = -tf.reduce_mean(tf.log(d_fake + 1e-8)) d_loss = -tf.reduce_mean(tf.log(d_real + 1e-8) + tf.log(1 - d_fake + 1e-8)) cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf.log(q + 1e-8) * x), axis=1) entropy = tf.reduce_mean(-tf.reduce_sum(tf.log(x + 1e-8) * x), axis=1) q_loss = cross_entropy + entropy for l in [g_loss, d_loss, q_loss]: tf.add_to_collection('losses', l) self.all_losses = hem.collection_to_dict(tf.get_collection('losses')) g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') q_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'predictor') self.g_train_op = g_opt.minimize(g_loss, var_list=g_vars) self.d_train_op = d_opt.minimize(d_loss, var_list=d_vars) self.q_train_op = q_opt.minimize(q_loss, var_list=q_vars + g_vars)
def _train_cgan(g_apply_grads, d_apply_grads, batchnorm_updates): """Generates helper to train a Conditional, Improved Wasserstein GAN. Batchnorm updates are applied only to discriminator, since generator doesn't use batchnorm. Discriminator is trained to convergence before training the generator. Args: g_apply_grads: d_apply_grads: batchnorm_updates: Returns: Function, a function that trains the model for one iteration per call. """ g_train_op = g_apply_grads with tf.control_dependencies(batchnorm_updates): d_train_op = d_apply_grads all_losses = hem.collection_to_dict(tf.get_collection('losses')) def helper(sess, args, handle, training_handle): for i in range(args.n_disc_train): sess.run(d_train_op, feed_dict={handle: training_handle}) _, l = sess.run([g_train_op, all_losses], feed_dict={handle: training_handle}) return l return helper
def _train_wgan(g_apply_grads, g_params, d_apply_grads, d_params, batchnorm_updates): """Generates helper to train a WGAN. This training method uses weight clipping and trains the discriminator to convergence before training the generator. Batchnorm updates are applied to both discriminator and generator. """ # add weight clipping method clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in d_params] clip_G = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in g_params] with tf.control_dependencies(batchnorm_updates): with tf.control_dependencies(clip_D): d_train_op = d_apply_grads with tf.control_dependencies(clip_G): g_train_op = g_apply_grads losses = hem.collection_to_dict(tf.get_collection('losses')) def helper(sess, args, train_phase): for i in range(args.n_disc_train): sess.run(d_train_op, feed_dict={train_phase: hem.PHASE_TRAIN}) _, l = sess.run([g_train_op, losses], feed_dict={train_phase: hem.PHASE_TRAIN}) return l return helper
def _train_gan(g_apply_grads, d_apply_grads, batchnorm_updates): """Generates helper to train a GAN. Original GAN implementation. Batchnorm is applied to both discriminator and generator. We alternate training of discriminator and generator equally. Args: g_apply_grads: Operation, to apply gradient updates to generator. d_apply_grads: Operation, to apply gradient updates to discriminator. Returns: Helper function to be used by train.py """ with tf.control_dependencies(batchnorm_updates): g_train_op = g_apply_grads d_train_op = d_apply_grads losses = hem.collection_to_dict(tf.get_collection('losses')) def helper(sess, args, train_phase): _, _, l = sess.run([d_train_op, g_train_op, losses], feed_dict={train_phase: PHASE_TRAIN}) return l return helper
def __init__(self, x_y, args): x_opt = hem.init_optimizer(args) y_opt = hem.init_optimizer(args) x_decoder_tower_grads = [] y_decoder_tower_grads = [] global_step = tf.train.get_global_step() for x_y, scope, gpu_id in hem.tower_scope_range( x_y, args.n_gpus, args.batch_size): x = hem.rescale(x_y[0], (0, 1), (-1, 1)) y = hem.rescale(x_y[1], (0, 1), (-1, 1)) with tf.variable_scope('encoder'): e = artist.encoder(x, reuse=gpu_id > 0) with tf.variable_scope('x_decoder'): x_hat = artist.decoder(e, args, channel_output=3, reuse=gpu_id > 0) with tf.variable_scope('y_decoder'): y_hat = artist.decoder(e, args, channel_output=1, reuse=gpu_id > 0) x_hat_loss, y_hat_loss = artist.losses(x, x_hat, y, y_hat, gpu_id == 0) encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'encoder') x_decoder_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'x_decoder') y_decoder_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'y_decoder') # # train for x-reconstruction # x_decoder_tower_grads.append(x_opt.compute_gradients(x_hat_loss, var_list=encoder_vars + x_decoder_vars)) # y_decoder_tower_grads.append(y_opt.compute_gradients(y_hat_loss, var_list=y_decoder_vars)) # train for y-reconstruction x_decoder_tower_grads.append( x_opt.compute_gradients(x_hat_loss, var_list=x_decoder_vars)) y_decoder_tower_grads.append( y_opt.compute_gradients(y_hat_loss, var_list=encoder_vars + y_decoder_vars)) batchnorm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) x_grads = hem.average_gradients(x_decoder_tower_grads) y_grads = hem.average_gradients(y_decoder_tower_grads) with tf.control_dependencies(batchnorm_updates): self.x_train_op = x_opt.apply_gradients(x_grads, global_step=global_step) self.y_train_op = y_opt.apply_gradients(y_grads, global_step=global_step) self.x_hat = x_hat self.y_hat = y_hat self.x_hat_loss = x_hat_loss self.y_hat_loss = y_hat_loss self.all_losses = hem.collection_to_dict(tf.get_collection('losses')) artist.summaries(x, y, x_hat, y_hat, x_grads, y_grads, args)
def default_training(train_op): """Trainining function that just runs an op (or list of ops).""" losses = hem.collection_to_dict(tf.get_collection('losses')) def helper(sess, args, handle, handle_value): _, results = sess.run([train_op, losses], feed_dict={handle: handle_value}) return results return helper
def __init__(self, x_y, args): # init/setup m_opt = hem.init_optimizer(args) m_tower_grads = [] global_step = tf.train.get_global_step() # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range(x_y, args.n_gpus, args.batch_size): # for i in range(len(x_y)): # print('estimator', i, x_y[i]) # x = x_y[0] # y = x_y[1] m_arch = {'E2': mean_depth_estimator.E2} x = x_y[4] x = tf.reshape(x, (-1, 3, 53, 70)) # print('estimator x shape', x) y = x_y[5] with tf.variable_scope('model'): m_func = m_arch[args.m_arch] m = m_func(x, args, reuse=(gpu_id>0)) self.output_layer = m # calculate losses m_loss = mean_depth_estimator.loss(m, x, y, args, reuse=(gpu_id > 0)) # calculate gradients m_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'model') m_tower_grads.append(m_opt.compute_gradients(m_loss, var_list=m_params)) # only need one batchnorm update (ends up being updates for last tower) batchnorm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) # TODO: do we need to do this update for batchrenorm? for instance renorm? # average and apply gradients m_grads = hem.average_gradients(m_tower_grads, check_numerics=args.check_numerics) m_apply_grads = m_opt.apply_gradients(m_grads, global_step=global_step) # add summaries hem.summarize_losses() hem.summarize_gradients(m_grads, name='m_gradients') hem.summarize_layers('m_activations', [l for l in tf.get_collection('conv_layers') if 'model' in l.name], montage=True) mean_depth_estimator.montage_summaries(x, y, m, args) # improved_sampler.montage_summarpies(x, y, g, x_sample, y_sample, g_sampler, x_noise, g_noise, x_shuffled, y_shuffled, g_shuffle, args) # improved_sampler.sampler_summaries(y_sample, g_sampler, g_noise, y_shuffled, g_shuffle, args) # training ops with tf.control_dependencies(batchnorm_updates): self.m_train_op = m_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))
def __init__(self, x_y, args): # init/setup g_opt = tf.train.AdamOptimizer(args.g_lr, args.g_beta1, args.g_beta2) d_opt = tf.train.AdamOptimizer(args.d_lr, args.d_beta1, args.d_beta2) g_tower_grads = [] d_tower_grads = [] global_step = tf.train.get_global_step() self.mean_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) # self.var_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range( x_y, args.n_gpus, args.batch_size): with tf.variable_scope('input_preprocess'): # split inputs and rescale x = x_y[0] y = x_y[1] # re-attach shape info x = tf.reshape(x, (args.batch_size, 3, 65, 65)) # rescale from [0,1] to actual world depth y = y * 10.0 y = hem.crop_to_bounding_box(y, 17, 17, 29, 29) # re-attach shape info y = tf.reshape(y, (args.batch_size, 1, 29, 29)) y_bar = tf.reduce_mean(y, axis=[2, 3], keep_dims=True) x_sample = tf.stack([x[0]] * args.batch_size) y_sample = tf.stack([y[0]] * args.batch_size) # create model with tf.variable_scope('generator'): if args.model_version == 'baseline': g = self.g_baseline(x, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar g_sampler = self.g_baseline(x_sample, args, reuse=True) y_sample_bar = tf.reduce_mean(y_sample, axis=[2, 3], keep_dims=True) y_sampler = g_sampler + y_sample_bar with tf.variable_scope('discriminator'): if args.model_version == 'baseline': # this is the 'mean_adjusted' model from paper_baseline_sampler.py d_fake, d_fake_logits = self.d_baseline(x, y_hat - y_bar, args, reuse=(gpu_id > 0)) d_real, d_real_logits = self.d_baseline(x, y - y_bar, args, reuse=True) # calculate losses g_loss, d_loss = self.loss(d_real, d_real_logits, d_fake, d_fake_logits, reuse=(gpu_id > 0)) # calculate gradients g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') g_tower_grads.append( g_opt.compute_gradients(g_loss, var_list=g_params)) d_tower_grads.append( d_opt.compute_gradients(d_loss, var_list=d_params)) # average and apply gradients g_grads = hem.average_gradients(g_tower_grads, check_numerics=args.check_numerics) d_grads = hem.average_gradients(d_tower_grads, check_numerics=args.check_numerics) g_apply_grads = g_opt.apply_gradients(g_grads, global_step=global_step) d_apply_grads = d_opt.apply_gradients(d_grads, global_step=global_step) # add summaries hem.summarize_losses() hem.summarize_gradients(g_grads, name='g_gradients') hem.summarize_gradients(d_grads, name='d_gradients') generator_layers = [ l for l in tf.get_collection('conv_layers') if 'generator' in l.name ] discriminator_layers = [ l for l in tf.get_collection('conv_layers') if 'discriminator' in l.name ] hem.summarize_layers('g_activations', generator_layers, montage=True) hem.summarize_layers('d_activations', discriminator_layers, montage=True) self.montage_summaries(x, y, g, y_hat, args, name='y_hat') self.metric_summaries(x, y, g, y_hat, args, name='y_hat') self.metric_summaries(x, y, g_0, y_0, args, name='y_0') self.metric_summaries(x, y, g, self.mean_image_placeholder * 10.0, args, name='y_mean') self.metric_summaries(x_sample, y_sample, g_sampler, y_sampler, args, name='y_sampler') self.montage_summaries(x_sample, y_sample, g_sampler, y_sampler, args, name='y_sampler') # training ops self.g_train_op = g_apply_grads self.d_train_op = d_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))
def train(model, iterators, handle, sv, args, reset=False): try: checkpoint_path = os.path.join(args.dir, 'checkpoint') losses = hem.collection_to_dict(tf.get_collection('losses')) with sv.sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # summary_train_writer.add_graph(sess.graph, global_step=global_step) # initialize start_time = time.time() if reset: sess.run(sv.reset_global_step) sess.run(sv.reset_global_epoch) current_step = int(sess.run(sv.global_step)) current_epoch = int(sess.run(sv.global_epoch)) # set max epochs based on +n or n format max_epochs = current_epoch + int( args.epochs[1:]) if '+' in args.epochs else int(args.epochs) # initialize datasets for k, v in iterators.items(): sess.run(iterators[k]['x'].initializer) # get handles for datasets training_handle = sess.run(iterators['train']['handle']) validation_handle = sess.run(iterators['validate']['handle']) if 'test' in iterators and iterators['test']['handle'] is not None: test_handle = sess.run(iterators['test']['handle']) # save model params before any training has been done if current_step == 0: hem.message('Generating baseline summaries and checkpoint...') sv.sv.saver.save(sess, save_path=checkpoint_path, global_step=sv.global_step) sv.summary_writers['train'].add_summary( sess.run(sv.summary_op, feed_dict={handle: validation_handle}), global_step=sess.run(sv.global_step)) hem.message('Starting training...') for epoch in range(current_epoch, max_epochs): prog_bar = tqdm(range(iterators['train']['batches']), desc='Epoch {:3d}'.format(epoch + 1), unit='batch') running_total = None for i in prog_bar: # train and display status status = model.train(sess, args, {handle: training_handle}) hem.update_moving_average(status, running_total, prog_bar) # record 10 extra summaries (per epoch) in the first 3 epochs if epoch < 3 and i % int( (iterators['train']['batches'] / 10)) == 0: sv.summary_writers['train'].add_summary( sess.run(sv.summary_op, feed_dict={handle: training_handle}), global_step=sess.run(sv.global_step)) elif epoch >= 3 and i % int( (iterators['train']['batches'] / 3)) == 0: sv.summary_writers['train'].add_summary( sess.run(sv.summary_op, feed_dict={handle: training_handle}), global_step=sess.run(sv.global_step)) sess.run(sv.increment_global_step) # print('global step:', sess.run(sv.global_step)) # update epoch count sess.run(sv.increment_global_epoch) current_epoch = int(sess.run(sv.global_epoch)) # generate end-of-epoch summaries sv.summary_writers['train'].add_summary( sess.run(sv.summary_op, feed_dict={handle: training_handle}), global_step=sess.run(sv.global_step)) # save checkpoint sv.sv.saver.save(sess, save_path=checkpoint_path, global_step=sv.global_epoch) # perform validation hem.inference(sess, losses, sv.summary_op, iterators['validate']['batches'], handle, validation_handle, 'Validation', sv.summary_writers['validate'], sv.global_step) # perform testing, if asked if (epoch + 1) in args.test_epochs: hem.inference(sess, losses, iterators['test']['batches'], handle, test_handle, 'Test', sv.summary_writers['test'], sv.global_step) hem.message('\nTraining complete! Elapsed time: {}s'.format( int(time.time() - start_time))) except Exception as e: print('Caught unexpected exception during training:', e, e.message) sys.exit(-1)
def __init__(self, x_y, args): # init/setup g_opt = tf.train.AdamOptimizer(args.g_lr, args.g_beta1, args.g_beta2) g_tower_grads = [] global_step = tf.train.get_global_step() self.mean_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) # self.var_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) self.x = [] self.y = [] self.y_hat = [] self.g = [] # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range(x_y, args.n_gpus, args.batch_size): with tf.variable_scope('input_preprocess'): # split inputs and rescale x = tf.identity(x_y[0], name='tower_{}_x'.format(gpu_id)) y = tf.identity(x_y[1], name='tower_{}_y'.format(gpu_id)) # re-attach shape info x = tf.reshape(x, (args.batch_size, 3, 65, 65)) # rescale from [0,1] to actual world depth y = y * 10.0 y = hem.crop_to_bounding_box(y, 17, 17, 29, 29) # re-attach shape info y = tf.reshape(y, (args.batch_size, 1, 29, 29)) y_bar = tf.reduce_mean(y, axis=[2, 3], keep_dims=True) y_bar = tf.identity(y_bar, name='tower_{}_y_bar'.format(gpu_id)) # create model with tf.variable_scope('generator'): if args.model_version == 'baseline': g = self.g_baseline(x, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g y_0 = g_0 elif args.model_version == 'mean_adjusted': g = self.g_baseline(x, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar elif args.model_version == 'mean_provided': g = self.g_mean_provided(x, y_bar, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar elif args.model_version == 'mean_provided2': g = self.g_mean_provided2(x, y_bar, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar g = tf.identity(g, 'tower_{}_g'.format(gpu_id)) g_0 = tf.identity(g_0, 'tower_{}_g0'.format(gpu_id)) y_hat = tf.identity(y_hat, 'tower_{}_y_hat'.format(gpu_id)) y_0 = tf.identity(y_0, 'tower_{}_y0'.format(gpu_id)) # if gpu_id == 0: # tf.summary.histogram('g', g) # tf.summary.histogram('y_hat', y_hat) # tf.summary.histogram('y_0', y_0) # hem.montage(g, num_examples=64, width=8, height=8, name='g') # hem.montage(y_hat, num_examples=64, width=8, height=8, name='y_hat') # hem.montage(y_0, num_examples=64, width=8, height=8, name='y_0') self.g.append(g) self.y_hat.append(y_hat) self.y.append(y) self.x.append(x) # calculate losses g_loss = self.loss(x, y, y_hat, args, reuse=(gpu_id > 0)) # calculate gradients g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') g_tower_grads.append(g_opt.compute_gradients(g_loss, var_list=g_params)) # average and apply gradients g_grads = hem.average_gradients(g_tower_grads, check_numerics=args.check_numerics) g_apply_grads = g_opt.apply_gradients(g_grads, global_step=global_step) # add summaries hem.summarize_losses() hem.summarize_gradients(g_grads, name='g_gradients') generator_layers = [l for l in tf.get_collection('conv_layers') if 'generator' in l.name] hem.summarize_layers('g_activations', generator_layers, montage=True) self.montage_summaries(x, y, g, y_hat, args) self.metric_summaries(x, y, g, y_hat, args, name='y_hat') self.metric_summaries(x, y, g_0, y_0, args, name='y_0') self.metric_summaries(x, y, g, self.mean_image_placeholder * 10.0, args, name='y_mean') # training ops self.g_train_op = g_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))
def __init__(self, x_y, estimator, args): # init/setup g_opt = hem.init_optimizer(args) d_opt = hem.init_optimizer(args) g_tower_grads = [] d_tower_grads = [] global_step = tf.train.get_global_step() # sess = tf.Session(config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) # new_saver = tf.train.import_meta_graph( # '/mnt/research/projects/autoencoders/workspace/improved_sampler/experimentE/meandepth.e1/checkpoint-4.meta', import_scope='estimator') # new_saver.restore(sess, tf.train.latest_checkpoint('/mnt/research/projects/autoencoders/workspace/improved_sampler/experimentE/meandepth.e1')) # # # estimator_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # # print('estimator_vars:', estimator_vars) # # # print('all ops!') # # for op in tf.get_default_graph().get_operations(): # # if 'l8' in str(op.name): # # print(str(op.name)) # # # # sess.graph # # graph = tf.get_default_graph() # estimator_tower0 = sess.graph.as_graph_element('estimator/tower_0/model/l8/add').outputs[0] # estimator_tower1 = sess.graph.as_graph_element('estimator/tower_1/model/l8/add').outputs[0] # self.estimator_placeholder = sess.graph.as_graph_element('estimator/input_pipeline/Placeholder') #.outputs[0] # print('PLACEHOLDER:', self.estimator_placeholder) # print('estimator_tower0:', estimator_tower0) # print('estimator_tower1:', estimator_tower1) # estimator_tower0 = tf.stop_gradient(estimator_tower0) # estimator_tower1 = tf.stop_gradient(estimator_tower1) # sess.close() # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range(x_y, args.n_gpus, args.batch_size): with tf.variable_scope('input_preprocess'): # split inputs and rescale x = hem.rescale(x_y[0], (0, 1), (-1, 1)) y = hem.rescale(x_y[1], (0, 1), (-1, 1)) # if args.g_arch == 'E2': y = hem.crop_to_bounding_box(y, 16, 16, 32, 32) y = tf.reshape(y, (-1, 1, 32, 32)) x_loc = x_y[2] y_loc = x_y[3] scene_image = x_y[4] mean_depth = tf.stop_gradient(estimator.output_layer) # print('mean_depth_layer:', estimator.output_layer) # mean_depth = estimator(scene_image) # mean_depth = tf.expand_dims(mean_depth, axis=-1) # mean_depth = tf.expand_dims(mean_depth, axis=-1) mean_depth_channel = tf.stack([mean_depth] * 64, axis=2) mean_depth_channel = tf.stack([mean_depth_channel] * 64, axis=3) # mean_depth_channel = tf.squeeze(mean_depth_channel) # print('mean_depth_layer99:', mean_depth_channel) # mean_depth_channel = tf.ones_like(x_loc) * mean_depth # print('x', x) # print('x_loc', x_loc) # print('y_loc', y_loc) # print('mean_depth_channel', mean_depth_channel) x = tf.concat([x, x_loc, y_loc, mean_depth_channel], axis=1) # print('x shape:', x) # create repeated image tensors for sampling x_sample = tf.stack([x[0]] * args.batch_size) y_sample = tf.stack([y[0]] * args.batch_size) # shuffled x for variance calculation x_shuffled = tf.random_shuffle(x) y_shuffled = y # noise vector for testing x_noise = tf.random_uniform(tf.stack([tf.Dimension(args.batch_size), x.shape[1], x.shape[2], x.shape[3]]), minval=-1.0, maxval=1.0) g_arch = {'E2': experimental_sampler.generatorE2} d_arch = {'E2': experimental_sampler.discriminatorE2} # create model with tf.variable_scope('generator'): g_func = g_arch[args.g_arch] g = g_func(x, args, reuse=(gpu_id > 0)) g_sampler = g_func(x_sample, args, reuse=True) g_shuffle = g_func(x_shuffled, args, reuse=True) g_noise = g_func(x_noise, args, reuse=True) with tf.variable_scope('discriminator'): d_func = d_arch[args.d_arch] d_real, d_real_logits = d_func(x, y, args, reuse=(gpu_id > 0)) d_fake, d_fake_logits = d_func(x, g, args, reuse=True) # calculate losses g_loss, d_loss = experimental_sampler.loss(d_real, d_real_logits, d_fake, d_fake_logits, x, g, y, None, args, reuse=(gpu_id > 0)) # calculate gradients g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') g_tower_grads.append(g_opt.compute_gradients(g_loss, var_list=g_params)) d_tower_grads.append(d_opt.compute_gradients(d_loss, var_list=d_params)) # only need one batchnorm update (ends up being updates for last tower) batchnorm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) # TODO: do we need to do this update for batchrenorm? for instance renorm? # average and apply gradients g_grads = hem.average_gradients(g_tower_grads, check_numerics=args.check_numerics) d_grads = hem.average_gradients(d_tower_grads, check_numerics=args.check_numerics) g_apply_grads = g_opt.apply_gradients(g_grads, global_step=global_step) d_apply_grads = d_opt.apply_gradients(d_grads, global_step=global_step) # add summaries hem.summarize_losses() hem.summarize_gradients(g_grads, name='g_gradients') hem.summarize_gradients(d_grads, name='d_gradients') hem.summarize_layers('g_activations', [l for l in tf.get_collection('conv_layers') if 'generator' in l.name], montage=True) hem.summarize_layers('d_activations', [l for l in tf.get_collection('conv_layers') if 'discriminator' in l.name], montage=True) experimental_sampler.montage_summaries(x, y, g, x_sample, y_sample, g_sampler, x_noise, g_noise, x_shuffled, y_shuffled, g_shuffle, args) experimental_sampler.sampler_summaries(y_sample, g_sampler, g_noise, y_shuffled, g_shuffle, args) # training ops with tf.control_dependencies(batchnorm_updates): self.g_train_op = g_apply_grads self.d_train_op = d_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))
def __init__(self, x_y, args): # init/setup # wgan training if args.training_version == 'wgan': g_opt = tf.train.RMSPropOptimizer(args.g_lr) d_opt = tf.train.AdamOptimizer(args.d_lr) else: g_opt = tf.train.AdamOptimizer(args.g_lr, args.g_beta1, args.g_beta2) d_opt = tf.train.AdamOptimizer(args.d_lr, args.d_beta1, args.d_beta2) g_tower_grads = [] d_tower_grads = [] global_step = tf.train.get_global_step() self.x = [] self.y = [] self.y_hat = [] self.g = [] self.mean_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) # self.var_image_placeholder = tf.placeholder(dtype=tf.float32, shape=(1, 29, 29)) # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range(x_y, args.n_gpus, args.batch_size): with tf.variable_scope('input_preprocess'): # split inputs and rescale x = tf.identity(x_y[0], name='tower_{}_x'.format(gpu_id)) y = tf.identity(x_y[1], name='tower_{}_y'.format(gpu_id)) # re-attach shape info x = tf.reshape(x, (args.batch_size, 3, 65, 65)) # rescale from [0,1] to actual world depth y = y * 10.0 y = hem.crop_to_bounding_box(y, 17, 17, 29, 29) # re-attach shape info y = tf.reshape(y, (args.batch_size, 1, 29, 29)) y_bar = tf.reduce_mean(y, axis=[2, 3], keep_dims=True) y_bar = tf.identity(y_bar, name='tower_{}_y_bar'.format(gpu_id)) # create model with tf.variable_scope('generator'): if args.model_version == 'baseline': g = self.g_baseline(x, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g y_0 = g_0 elif args.model_version == 'mean_adjusted': g = self.g_baseline(x, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar elif args.model_version == 'mean_provided': g = self.g_mean_provided(x, y_bar, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar elif args.model_version == 'mean_provided2': g = self.g_mean_provided2(x, y_bar, args, reuse=(gpu_id > 0)) g_0 = tf.zeros_like(g) y_hat = g + y_bar y_0 = g_0 + y_bar g = tf.identity(g, 'tower_{}_g'.format(gpu_id)) g_0 = tf.identity(g_0, 'tower_{}_g0'.format(gpu_id)) y_hat = tf.identity(y_hat, 'tower_{}_y_hat'.format(gpu_id)) y_0 = tf.identity(y_0, 'tower_{}_y0'.format(gpu_id)) with tf.variable_scope('discriminator'): if args.model_version == 'baseline': d_fake, d_fake_logits = self.d_baseline(x, y_hat, args, reuse=(gpu_id > 0)) d_real, d_real_logits = self.d_baseline(x, y, args, reuse=True) elif args.model_version == 'mean_adjusted': d_fake, d_fake_logits = self.d_baseline(x, y_hat - y_bar, args, reuse=(gpu_id > 0)) d_real, d_real_logits = self.d_baseline(x, y - y_bar, args, reuse=True) elif args.model_version == 'mean_provided': d_fake, d_fake_logits = self.d_mean_provided(x, y_hat - y_bar, y_bar, args, reuse=(gpu_id > 0)) d_real, d_real_logits = self.d_mean_provided(x, y - y_bar, y_bar, args, reuse=True) elif args.model_version == 'mean_provided2': d_fake, d_fake_logits = self.d_mean_provided2(x, y_hat - y_bar, y_bar, args, reuse=(gpu_id > 0)) d_real, d_real_logits = self.d_mean_provided2(x, y - y_bar, y_bar, args, reuse=True) d_fake = tf.identity(d_fake, 'tower_{}_d_fake'.format(gpu_id)) d_real = tf.identity(d_real, 'tower_{}_d_real'.format(gpu_id)) d_fake_logits = tf.identity(d_fake_logits, 'tower_{}_d_fake_logits'.format(gpu_id)) d_real_logits = tf.identity(d_real_logits, 'tower_{}_d_real_logits'.format(gpu_id)) self.g.append(g) self.y_hat.append(y_hat) self.y.append(y) self.x.append(x) # calculate losses g_loss, d_loss = self.loss(d_real, d_real_logits, d_fake, d_fake_logits, args, reuse=(gpu_id > 0)) # calculate gradients g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') g_tower_grads.append(g_opt.compute_gradients(g_loss, var_list=g_params)) d_tower_grads.append(d_opt.compute_gradients(d_loss, var_list=d_params)) # average and apply gradients g_grads = hem.average_gradients(g_tower_grads, check_numerics=args.check_numerics) d_grads = hem.average_gradients(d_tower_grads, check_numerics=args.check_numerics) g_apply_grads = g_opt.apply_gradients(g_grads, global_step=global_step) d_apply_grads = d_opt.apply_gradients(d_grads, global_step=global_step) # add summaries hem.summarize_losses() hem.summarize_gradients(g_grads, name='g_gradients') hem.summarize_gradients(d_grads, name='d_gradients') generator_layers = [l for l in tf.get_collection('conv_layers') if 'generator' in l.name] discriminator_layers = [l for l in tf.get_collection('conv_layers') if 'discriminator' in l.name] hem.summarize_layers('g_activations', generator_layers, montage=True) hem.summarize_layers('d_activations', discriminator_layers, montage=True) self.montage_summaries(x, y, g, y_hat, args) self.metric_summaries(x, y, g, y_hat, args, name='y_hat') self.metric_summaries(x, y, g_0, y_0, args, name='y_0') self.metric_summaries(x, y, g, self.mean_image_placeholder * 10.0, args, name='y_mean') # training ops if args.training_version == 'wgan': clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in d_params] clip_G = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in g_params] with tf.control_dependencies(clip_D): self.d_train_op = d_apply_grads with tf.control_dependencies(clip_G): self.g_train_op = g_apply_grads else: self.g_train_op = g_apply_grads self.d_train_op = d_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))
def __init__(self, x_y, args): """Create conditional GAN ('pix2pix') model on the graph. Args: x: Tensor, the real images. args: Argparse structure Returns: Function, the training function. Call for one iteration of training. """ # init/setup g_opt = hem.init_optimizer(args) d_opt = hem.init_optimizer(args) g_tower_grads = [] d_tower_grads = [] global_step = tf.train.get_global_step() # rescale to [-1, 1] # x_y = hem.rescale(x_y, (0, 1), (-1, 1)) # foreach gpu... for x_y, scope, gpu_id in hem.tower_scope_range(x_y, args.n_gpus, args.batch_size): # split inputs and scale to [-1, 1] x = hem.rescale(x_y[0], (0, 1), (-1, 1)) y = hem.rescale(x_y[1], (0, 1), (-1, 1)) # x, y = tf.split(x_y, num_or_size_splits=[3, 1], axis=1) # repeated image tensor for sampling x_sample = tf.stack([x[0]] * args.examples) y_sample = tf.stack([y[0]] * args.examples) # create model with tf.variable_scope('generator'): g = pix2pix.generator(x, args, reuse=(gpu_id > 0)) g_sampler = pix2pix.generator(x_sample, args, reuse=True) with tf.variable_scope('discriminator'): d_real, d_real_logits = pix2pix.discriminator(x, y, args, reuse=(gpu_id > 0)) d_fake, d_fake_logits = pix2pix.discriminator(x, g, args, reuse=True) # losses g_loss, d_loss = pix2pix.loss(d_real, d_real_logits, d_fake, d_fake_logits, g, y, args, reuse=(gpu_id > 0)) # gradients g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') g_tower_grads.append(g_opt.compute_gradients(g_loss, var_list=g_params)) d_tower_grads.append(d_opt.compute_gradients(d_loss, var_list=d_params)) # only need one batchnorm update (ends up being updates for last tower) batchnorm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) # average and apply gradients g_grads = hem.average_gradients(g_tower_grads, check_numerics=args.check_numerics) d_grads = hem.average_gradients(d_tower_grads, check_numerics=args.check_numerics) g_apply_grads = g_opt.apply_gradients(g_grads, global_step=global_step) d_apply_grads = d_opt.apply_gradients(d_grads, global_step=global_step) # add summaries pix2pix.montage_summaries(x, y, g, args, x_sample, y_sample, g_sampler, d_real, d_fake) pix2pix.activation_summaries() pix2pix.loss_summaries() pix2pix.gradient_summaries(g_grads, 'generator_gradients') pix2pix.gradient_summaries(d_grads, 'discriminator_gradients') pix2pix.sampler_summaries(y_sample, g_sampler, args) # training ops with tf.control_dependencies(batchnorm_updates): self.d_train_op = d_apply_grads self.g_train_op = g_apply_grads self.all_losses = hem.collection_to_dict(tf.get_collection('losses'))