def main(): images, labels = inputs() reshaped_images = tf.reshape(images, [ mnist.BATCH_SIZE, mnist.IMAGE_HEIGHT, mnist.IMAGE_WIDTH, mnist.IMAGE_DEPTH ]) logits = mnist.inference(reshaped_images) loss = mnist.loss(logits, labels) accuracy = mnist.accuracy(logits, labels) train_op = mnist.train(loss) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for index in range(NUM_STEPS): batch_x, batch_y = mnist_data.train.next_batch(mnist.BATCH_SIZE) _, loss_value = sess.run([train_op, loss], feed_dict={ images: batch_x, labels: batch_y }) print("step:" + str(index + 1) + " loss: " + str(loss_value)) if (index + 1) % 10 == 0: validation_x, validation_y = mnist_data.validation.next_batch( mnist.BATCH_SIZE) accuracy_score = sess.run(accuracy, feed_dict={ images: validation_x, labels: validation_y }) print("accuracy : " + str(accuracy_score))
def inputs(eval_data): if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') images, labels = mnist_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) return images, labels
def inputs(eval_data): if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') images, labels = mnist_input.inputs(eval_data=eval_data, data_dir=data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels
def train(): """Train MNIST for a number of steps.""" # create a new graph and use it as default graph in the following context: with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): labels, images = mnist_input.inputs([FILE_NAMES], batchSize=100, shuffle=True) # Build a Graph that computes the logits predictions from the # inference model. logits = mnist_model.inference(images) # Calculate loss. loss = mnist_model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = mnist_model.train(loss, 0.001, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % 100 == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results sec_per_batch = float(duration / 100) format_str = ('%s: step %d, loss = %.2f (%.3f sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, sec_per_batch)) with tf.train.MonitoredTrainingSession(hooks=[_LoggerHook()]) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): with tf.Graph().as_default(): z_dim = 100 fc_dim = 512 # 1568 ################# MODEL + OPTIMIZER # Get images and labels images_tr, labels_tr = mnistip.distorted_inputs(randFlip=True) images_ev, labels_ev = mnistip.inputs(eval_data=True, numPrThread=1) # build model #### Placeholders images = tf.placeholder( dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) ##### zin = tf.placeholder(tf.float32, [FLAGS.batch_size, z_dim], name="z") # Generator G = gnm.generator(zin) # Discriminators D_prob_real, D_logit_real = gnm.discriminator(images) with tf.variable_scope(tf.get_variable_scope()) as scope: pass D_prob_fake, D_logit_fake = gnm.discriminator(G, reuse=True) # Losses dloss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logit_real, labels=tf.ones_like(D_logit_real))) dloss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake))) dloss = dloss_real + dloss_fake gloss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logit_fake, labels=tf.ones_like(D_logit_fake))) # Optimizer t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'd_' in var.name] g_vars = [var for var in t_vars if 'g_' in var.name] d_optim = tf.train.AdamOptimizer(learningrate, beta1=beta1) g_optim = tf.train.AdamOptimizer(learningrate, beta1=beta1) grads_d = tf.gradients(dloss, d_vars) grads_g = tf.gradients(gloss, g_vars) with tf.variable_scope(scope): train_d = d_optim.apply_gradients(zip(grads_d, d_vars)) train_g = g_optim.apply_gradients(zip(grads_g, g_vars)) ##### Tensorflow training ##################### # Create a saver. saver = tf.train.Saver() if (isModelFT): saver1 = tf.train.Saver(tf.trainable_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) if (isModelFT): saver1.restore(sess, restoreFileName) # Start the queue runners. tf.train.start_queue_runners(sess=sess) ############## TRAIN visualization, ev_labels = sess.run([images_ev, labels_ev]) reshaped_vis = np.squeeze(visualization) ims("results/real.jpg", merge_gs(reshaped_vis[:49], [7, 7])) display_z = np.random.uniform(-1, 1, [FLAGS.batch_size, z_dim]).astype( np.float32) for epoch in xrange(1): for steps_ in xrange(200001): batch_images, _ = sess.run([images_tr, labels_tr]) batch_z = np.random.uniform( -1, 1, [FLAGS.batch_size, z_dim]).astype(np.float32) # _, lossD, lossGen = sess.run([train_op, dloss, gloss], feed_dict={images: batch_images, zin: batch_z}) for k in xrange(1): _, lossD = sess.run([train_d, dloss], feed_dict={ images: batch_images, zin: batch_z }) for k in xrange(1): _, lossG = sess.run([train_g, gloss], feed_dict={zin: batch_z}) if steps_ % 100 == 0: format_str = ( '%s: Step %d, D-LOSS = %.2f, G-loss = %.2f\n') print(format_str % (datetime.now(), steps_, lossD, lossG)) if steps_ % 200 == 0: imgGen = sess.run([G], feed_dict={zin: display_z}) imgGen = np.squeeze(imgGen) ims("results/" + str(steps_) + ".jpg", merge_gs(imgGen[0:49], [7, 7])) if steps_ % 1000 == 0: checkpoint_path = os.path.join( FLAGS.train_dir, 'model-' + netName + '.ckpt') saver.save(sess, checkpoint_path, global_step=steps_)
def train(): cluster, server = setup_distribute() is_chief = (FLAGS.task_id == 0) if is_chief: log_dir = os.path.join(FLAGS.train_dir, 'log') monitor = monitor_cb.CMonitor(log_dir, tf_parameter_mgr.getTestInterval(), tf_parameter_mgr.getMaxSteps()) summaryWriter = tf.summary.FileWriter(log_dir) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_id, cluster=cluster)): global_step = tf.train.get_or_create_global_step() images, labels = mnist_input.inputs(eval_data=True, batch_size=batch_size) # Build a Graph that computes the logits predictions from the # inference model. logits = mnist_input.inference(images) # Calculate loss. loss = mnist_input.loss(logits, labels) accuracy = mnist_input.accuracy(logits, labels) train_op = mnist_input.train(loss, global_step) if is_chief: graph = tf.get_default_graph() for layer in ['conv1', 'conv2', 'local3', 'local4']: monitor.SummaryHist( "weight", graph.get_tensor_by_name(layer + '/weights:0'), layer) monitor.SummaryHist( "bias", graph.get_tensor_by_name(layer + '/biases:0'), layer) monitor.SummaryHist( "activation", graph.get_tensor_by_name(layer + '/' + layer + ':0'), layer) monitor.SummaryNorm2( "weight", graph.get_tensor_by_name(layer + '/weights:0'), layer) monitor.SummaryGradient("weight", loss) monitor.SummaryGWRatio() monitor.SummaryScalar("train loss", loss) monitor.SummaryScalar("train accuracy", accuracy) monitor.SummaryScalar("test loss", loss) monitor.SummaryScalar("test accuracy", accuracy) train_summaries = tf.summary.merge_all( monitor_cb.DLMAO_TRAIN_SUMMARIES) test_summaries = tf.summary.merge_all( monitor_cb.DLMAO_TEST_SUMMARIES) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._next_trigger_step = test_interval self._trigger = False def before_run(self, run_context): args = {'global_step': global_step} if self._trigger: self._trigger = False args['summary'] = train_summaries return tf.train.SessionRunArgs(args) # Asks for loss value. def after_run(self, run_context, run_values): gs = run_values.results['global_step'] if gs >= self._next_trigger_step: self._trigger = True self._next_trigger_step += test_interval summary = run_values.results.get('summary', None) if summary is not None: summaryWriter.add_summary(summary, gs) summary = run_context.session.run(test_summaries) summaryWriter.add_summary(summary, gs) hooks = [ tf.train.StopAtStepHook(last_step=tf_parameter_mgr.getMaxSteps()), tf.train.NanTensorHook(loss) ] if is_chief: hooks.append(_LoggerHook()) with tf.train.MonitoredTrainingSession( master=server.target, is_chief=is_chief, checkpoint_dir=FLAGS.train_dir, hooks=hooks, config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: steps = 0 while not mon_sess.should_stop(): print("training") print(steps) mon_sess.run(train_op) steps += 1 if steps % 100 == 0: print('%d steps executed on worker %d.' % (steps, FLAGS.task_id)) print('%d steps executed on worker %d.' % (steps, FLAGS.task_id)) if is_chief: summaryWriter.flush()
CODE_LEN = 20 imHeight = 28 imWidth = 28 numCh = 1 FLAGS = tf.app.flags.FLAGS restoreFileName = "/home/hasnat/Desktop/mnist_verify/vae_mnist/trlogs/model-mn_j_cl_2.ckpt-2000" images = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) labels = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size]) guessed_z = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, CODE_LEN]) images_ev, labels_ev = mnistip.inputs(eval_data=True, numPrThread=1) # CL ZONE _, encoded_, kappaVal, _, t_num_dim = cvn.inference_cl_cnn( images, CODE_LEN, NUM_CLASSES) im_gen = cvn.generate_cl_cnn_simp(guessed_z, t_num_dim) wts = [ v for v in tf.trainable_variables() if (v.name.lower().find('rec_softmax_linear/') >= 0) ] ##################### saver1 = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables()
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) #### For training CNN images = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) labels = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size]) ##### # Get images and labels images_tr, labels_tr = mnistip.distorted_inputs(randFlip=True) images_ev, labels_ev = mnistip.inputs(eval_data=True, numPrThread=1) # images_ev, labels_ev = mnistip.inputs(eval_data=True) ######################## # CL ZONE # logits_id, local, cent_var, kappaVal, wts, t_num_dim = cvn.inference_cl_cnn(images, CODE_LEN, NUM_CLASSES) logits_id, local, kappaVal, wts, t_num_dim = cvn.inference_rec_net(images, CODE_LEN, NUM_CLASSES) # Losses -CL loss_softmax_id = cvn.loss_softmax(logits_id, labels) loss_combined = cvn.loss_total() # Draw new sample # weight_maps = tf.gather(tf.transpose(wts), labels) guessed_z = local # guessed_z = tf.add(cent_var, weight_maps) # guessed_z = weight_maps # Losses - Generation im_gen = cvn.generation(guessed_z, t_num_dim) # Compute Loss Values generation_loss = -tf.reduce_sum(images * tf.log(1e-8 + im_gen) + (1-images) * tf.log(1e-8 + 1 - im_gen),[1,2,3]) # total_loss = tf.reduce_mean(generation_loss)*0.001 + loss_combined # total_loss = tf.reduce_mean(generation_loss) + loss_combined # total_loss = tf.reduce_mean(loss_combined) total_loss = tf.reduce_mean(generation_loss) # Optimize now # Apply variable specific learning rate for optimization var_rec = [v for v in tf.trainable_variables() if(v.name.lower().find('rec_') >= 0)] var_gen = [v for v in tf.trainable_variables() if(v.name.lower().find('gen_') >= 0)] opt_rec = tf.train.AdamOptimizer(INITIAL_LEARNING_RATE*0.001) opt_gen = tf.train.AdamOptimizer(INITIAL_LEARNING_RATE) grads = tf.gradients(total_loss, var_rec + var_gen) grads_rec = grads[:len(var_rec)] grads_gen = grads[len(var_rec):] train_rec = opt_rec.apply_gradients(zip(grads_rec, var_rec)) train_gen = opt_gen.apply_gradients(zip(grads_gen, var_gen)) train_op = tf.group(train_rec, train_gen) ##################### # Create a saver. saver = tf.train.Saver() if(isModelFT): saver1 = tf.train.Saver(tf.trainable_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) if(isModelFT): saver1.restore(sess, restoreFileName) # Start the queue runners. tf.train.start_queue_runners(sess=sess) ######################### visualization, ev_labels = sess.run([images_ev, labels_ev]) reshaped_vis = np.squeeze(visualization) ims("results/base.jpg",merge(reshaped_vis[:64],[8,8])) ######################### for step in xrange(FLAGS.max_steps): _images, _labels = sess.run([images_tr, labels_tr]) _, lossSM, lossTot, lossGen = sess.run([train_op, loss_softmax_id, loss_combined, generation_loss], feed_dict={images: _images, labels: _labels}) if step % 100 == 0: format_str = ('%s: Step %d, GEN-LOSS = %.2f, SM-loss = %.2f, T-loss = %.2f\n') print (format_str % (datetime.now(), step, np.mean(lossGen), lossSM, lossTot)) # save intermediate results generated_test = sess.run(im_gen, feed_dict={images: visualization, labels: ev_labels}) generated_test = np.squeeze(generated_test) ims("results/"+str(step)+'_'+saveImPrefix+".jpg",merge(generated_test[:64],[8,8])) ########################### # Evaluate test set ########################### numTestStep = int(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL / FLAGS.batch_size) predictions_id = np.ndarray(shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, mnistip.NUM_CLASSES), dtype = np.float64) ftVec = np.ndarray(shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL,FT_DIM), dtype = np.float64) tLabels = np.ndarray(shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL), dtype = np.float64) if step % 500 == 0: # Evaluate print("====== Evaluating ID classification ========\n") for step_ev in xrange(numTestStep): _images, _labels = sess.run([images_ev, labels_ev]) stIndx = step_ev*FLAGS.batch_size edIndx = (step_ev+1)*FLAGS.batch_size _fts, tpred_id = sess.run([local, logits_id], feed_dict={images: _images}) predictions_id[stIndx:edIndx, :] = np.asarray(tpred_id) ftVec[stIndx:edIndx, :] = np.asarray(_fts) tLabels[stIndx:edIndx] = np.asarray(_labels) obs_labels = np.argmax(predictions_id, axis=1) #print(lab.dtype) sum_ = np.sum(tLabels==obs_labels) acc_id = sum_/float(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL) print('================================') format_str = ('%s: Step %d, ID_Acc = %.5f\n') print (format_str % (datetime.now(), step, acc_id)) print('================================') if step % 1000 == 0: checkpoint_path = os.path.join(FLAGS.train_dir, 'model-'+netName+'.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(num_iters=20000, batch_size=100, checkpoint_step=10000, initial_temp=1., min_temp=0.5, anneal_rate=0.00003, learning_rate=1e-3, checkpoint_dir='checkpoint', sample_step=5000, sample_dir='sample', cont_dim=2, discrete_dim=10, data_dir=None, dataset='mnist'): if data_dir is None: raise Exception('The directory which contains the training data' 'must be passed in using --data_dir') if dataset == 'mnist': input_, _ = mnist_input.inputs(True, data_dir, batch_size) input_ = tf.reshape(input_, [-1, 28, 28, 1]) data_shape = input_.get_shape().as_list()[1:] elif dataset == 'cifar10': input_, _ = cifar10_input.inputs(True, data_dir, batch_size) data_shape = input_.get_shape().as_list()[1:] else: raise Exception('The dataset must be "mnist" or "cifar10"') vae = ConcreteVae(input_=input_, cont_dim=cont_dim, discrete_dim=discrete_dim) sess = tf.Session() saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) temp = initial_temp for i in range(num_iters): _, loss, cat_kl, cont_kl, recon = \ sess.run([vae.optimizer, vae.loss, vae.discrete_kl, vae.normal_kl, vae.reconstruction], {vae.tau: temp, vae.learning_rate: learning_rate}) if i % checkpoint_step == 0: path = saver.save(sess, checkpoint_dir + '/model.ckpt') print('Model saved at iteration {} in checkpoint {}'.format( i, path)) if i % sample_step == 0: plot_2d(sess, sample_dir=sample_dir, step=i, vae=vae, shape=data_shape, discrete_dim=discrete_dim, cont_dim=cont_dim) print('Sample generated at step {}'.format(i)) if i % 1000 == 0: temp = np.maximum(initial_temp * np.exp(-anneal_rate * i), min_temp) learning_rate *= 0.9 print('Temperature updated to {}\n'.format(temp) + 'Learning rate updated to {}'.format(learning_rate)) if i % 1000 == 0: print('Iteration {}\nLoss: {}\n'.format(i, loss) + 'Categorical KL: {}\n'.format(np.mean(cat_kl)) + 'Continuous KL: {}\n'.format(np.mean(cont_kl)) + 'Recon: {}'.format(np.mean(recon))) coord.request_stop() coord.join(threads) sess.close()
def inputs(): if not FLAGS.data_dir: raise ValueError('Please supply a data_dir') data_dir = FLAGS.data_dir return mnist_input.inputs(data_dir=data_dir, batch_size=FLAGS.batch_size)
def train(): """Train MNIST for a number of steps.""" # create a new graph and use it as default graph in the following context: # this context will also be created on host side with tf.Graph().as_default(), tf.device('/cpu:0'): #global_step = tf.train.get_or_create_global_step() # why not use this? global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(0.001) # create data batch on host side labels, images = mnist_input.inputs([FILE_NAMES], batchSize=100, shuffle=True) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [labels, images], capacity=2 * NUM_GPU) # compute gradients on each device tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(NUM_GPU): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: # Dequeues one batch for the GPU label_batch, image_batch = batch_queue.dequeue() # compute local loss for the batch loss = tower_loss(scope, image_batch, label_batch) # share the variables of model from gpu:0 tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this device. grads = opt.compute_gradients(loss) # push local gradients into the global container 'tower_grads' tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver to save all variables saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # First of all, initialize the model variables sess.run(init) # Then, start the queue runners. tf.train.start_queue_runners(sess=sess) # run the model N times for step in xrange(50000): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = 100 / 2 examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / 2 format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def inputs(eval_data): images, labels = mnist_input.inputs(data_dir=FLAGS.data_dir, batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(images, tf.float16)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) #### For training CNN images = tf.placeholder( dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) labels = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size]) learning_rate = tf.placeholder(dtype=tf.float32, shape=[]) ##### # Get images and labels images_tr, labels_tr = mnistip.distorted_inputs(randFlip=True) images_ev, labels_ev = mnistip.inputs(eval_data=True) # Build a Graph that computes the logits predictions # local, logits_id = mnist_net.inference_id_classification(images) logits_id, local, kappaVal = mncnn.inference(images) # Calculate losses... # loss_L2 = mnist_net.loss_L2() #loss_softmax_id = mnist_net.loss_softmax(logits_id, labels) #loss_combined = mnist_net.loss_combined_no_param() loss_softmax_id = mncnn.loss_softmax(logits_id, labels) loss_combined = mncnn.loss_total() train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss_combined) # Create a saver. saver = tf.train.Saver() if (isModelFT): saver1 = tf.train.Saver(tf.trainable_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) if (isModelFT): saver1.restore(sess, restoreFileName) # Start the queue runners. tf.train.start_queue_runners(sess=sess) if (saveLogFile): if (os.path.isfile(fname)): os.remove(fname) f_handle = open(fname, 'a') # for step in xrange(1): lr_ = INITIAL_LEARNING_RATE mulsteplr = [6, 8, 10] stEpoch = 1 mulsteplr = np.array(mulsteplr) currEpoch = int(stEpoch * FLAGS.batch_size / mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) mulstepCnt = np.where(currEpoch < mulsteplr)[0][0] lr_ = lr_**(mulstepCnt + 1) for step in xrange(stEpoch, FLAGS.max_steps): # Learning rate decrease policy currEpoch = int(step * FLAGS.batch_size / mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) if (currEpoch >= mulsteplr[mulstepCnt]): print(str(currEpoch) + ':: Decreasing learning rate') lr_ = 0.8 * lr_ mulstepCnt = mulstepCnt + 1 _images, _labels = sess.run([images_tr, labels_tr]) _, lossID, lossComb, kappaVal_ = sess.run( [train_op, loss_softmax_id, loss_combined, kappaVal], feed_dict={ images: _images, labels: _labels, learning_rate: lr_ }) if step % 100 == 0: format_str = ( '%s: Step %d, LR=%.4f, ID-loss = %.2f, T-loss = %.2f, Kappa = %.2f\n' ) print(format_str % (datetime.now(), step, lr_, lossID, lossComb, kappaVal_)) if (saveLogFile): f_handle.write(format_str % (datetime.now(), step, lr_, lossID, lossComb, kappaVal_)) assert not np.isnan(lossComb), 'Model diverged with loss = NaN' # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: if (isSaveModel): if isModelFT: checkpoint_path = os.path.join( FLAGS.train_dir, 'model-' + netName + '-ft.ckpt') else: checkpoint_path = os.path.join( FLAGS.train_dir, 'model-' + netName + '.ckpt') print('saving model ...') saver.save(sess, checkpoint_path, global_step=step) ########################### # Evaluate test set ########################### numTestStep = int(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL / FLAGS.batch_size) predictions_id = np.ndarray( shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, mnistip.NUM_CLASSES), dtype=np.float64) ftVec = np.ndarray( shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL, FT_DIM), dtype=np.float64) tLabels = np.ndarray( shape=(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL), dtype=np.float64) # Evaluate print("====== Evaluating ID classification ========\n") for step_ev in xrange(numTestStep): _images, _labels = sess.run([images_ev, labels_ev]) stIndx = step_ev * FLAGS.batch_size edIndx = (step_ev + 1) * FLAGS.batch_size _fts, tpred_id = sess.run([local, logits_id], feed_dict={images: _images}) predictions_id[stIndx:edIndx, :] = np.asarray(tpred_id) ftVec[stIndx:edIndx, :] = np.asarray(_fts) tLabels[stIndx:edIndx] = np.asarray(_labels) obs_labels = np.argmax(predictions_id, axis=1) #print(lab.dtype) sum_ = np.sum(tLabels == obs_labels) acc_id = sum_ / float(mnistip.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL) print('================================') format_str = ('%s: Step %d, ID_Acc = %.5f\n') np.savez(netName + '_' + str(step), X=ftVec, y=tLabels) print(format_str % (datetime.now(), step, acc_id)) if (saveLogFile): f_handle.write( '==================================================\n') f_handle.write(format_str % (datetime.now(), step, acc_id)) f_handle.write( '==================================================\n')
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) ######################## # Get images and labels images_tr, labels_tr = mnistip.distorted_inputs(randFlip=False) images_ev, labels_ev = mnistip.inputs(eval_data=True) ######################## # VAE ZONE images = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) # vae_code = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, CODE_LEN]) # Define Encoder z_mean, z_stddev, t_num_dim = ved.recognition(images, CODE_LEN) # Draw new sample samples = tf.random_normal([FLAGS.batch_size,CODE_LEN],0,1,dtype=tf.float32) guessed_z = z_mean + (z_stddev * samples) # Define Decoder im_gen = ved.generation(guessed_z, t_num_dim) # Compute Loss Values generation_loss = -tf.reduce_sum(images * tf.log(1e-8 + im_gen) + (1-images) * tf.log(1e-8 + 1 - im_gen),[1,2,3]) latent_loss = 0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(z_stddev) - tf.log(tf.square(z_stddev)) - 1,1) total_loss = tf.reduce_mean(generation_loss + latent_loss) # Optimize now train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss) ##################### # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) ######################### visualization, _ = sess.run([images_ev, labels_ev]) reshaped_vis = np.squeeze(visualization) ims("results/base.jpg",merge(reshaped_vis[:64],[8,8])) for step in xrange(FLAGS.max_steps): _images, _labels = sess.run([images_tr, labels_tr]) _, lossGen, lossLat = sess.run([train_op, generation_loss, latent_loss], feed_dict={images: _images}) if step % 20 == 0: format_str = ('%s: Step %d, GEN-loss = %.2f, LAT-loss = %.2f\n') print (format_str % (datetime.now(), step, np.mean(lossGen), np.mean(lossLat))) # save intermediate results generated_test = sess.run(im_gen, feed_dict={images: visualization}) generated_test = np.squeeze(generated_test) ims("results/"+str(step)+".jpg",merge(generated_test[:64],[8,8]))