def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels for CIFAR-10. images, labels, names = drd.distorted_inputs() # # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) names_list = [] start_time = time.time() for i in range(0, 20000): if i % 1000 == 0: print(i) names_im = sess.run([names]) names_list.append(names_im) names = np.vstack(names_list).flatten() #print(names) print(type(names)) print(names.shape) print(np.unique(names, return_counts=True)) uniq, counts = np.unique(names, return_counts=True) print(uniq.shape) print(counts.shape)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels for CIFAR-10. images, labels, names = drd.distorted_inputs() #get validation data val_images, val_labels = drd.inputs(True) # Build a Graph that computes the logits predictions from the # inference model. #logits1= drd.inference(images, FLAGS.n_residual_blocks) logits = drd.resnet_v1_50(images) val_logits = drd.resnet_v1_50(val_images) # calculate predictions predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32) val_predictions = tf.cast(tf.argmax(val_logits, axis=1), tf.int32) # ops for batch accuracy calcultion correct_prediction = tf.equal(predictions, labels) val_correct_prediction = tf.equal(val_predictions, labels) batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) val_batch_accuracy = tf.reduce_mean( tf.cast(val_correct_prediction, tf.float32)) # calculate training accuracy # Calculate loss. loss = drd.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = drd.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables()) variables = slim.get_variables_to_restore() variables_to_restore = [ v for v in variables if not v.name.split('/')[-1] != 'weights:0' ] # Add ops to save and restore all the variables. saver_pre = tf.train.Saver( variables_to_restore[0:-2]) # exclude logits layer # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.save_dir, sess.graph) step_start = 0 try: ####Trying to find last checkpoint file fore full final model exist### print("Trying to restore last checkpoint ...") save_dir = FLAGS.save_dir # Use TensorFlow to find the latest checkpoint - if any. last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir) # Try and load the data in the checkpoint. saver.restore(sess, save_path=last_chk_path) # If we get to this point, the checkpoint was successfully loaded. print("Restored checkpoint from:", last_chk_path) # get the step integer from restored path to start step from there step_start = int( filter( str.isdigit, unicodedata.normalize('NFKD', last_chk_path).encode( 'ascii', 'ignore'))) except: # If all the above failed for some reason, simply # initialize all the variables for the TensorFlow graph. print( "Failed to restore any checkpoints. Initializing variables instead." ) sess.run(init) accuracy_dev = [] val_accuracy_dev = [] for step in xrange(step_start, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy = sess.run( [train_op, loss, batch_accuracy]) #im, lab,log = sess.run([images,labels,logits]) #append the next accuray to the development list accuracy_dev.append(accuracy) #print([lab,log]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: im_id, val_acc = sess.run([names, val_batch_accuracy]) val_accuracy_dev.append(val_acc) print("the image being trained on is {}".format(im_id)) print("The average validation accuracy is: {}".format( np.mean(val_accuracy_dev))) num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f ' 'sec/batch)') # take averages of all the accuracies from the previous bathces print(format_str % (datetime.now(), step, loss_value, np.mean(accuracy_dev), examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 100 == 0 or (step + 1) == FLAGS.max_steps: #set paths and saving ops for the full and sub_network checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt') #pre_trained_path = os.path.join(FLAGS.pre_trained_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(): #for which data set to use """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels images, labels = drd.distorted_inputs() #get validation data val_images, val_labels = drd.inputs(False) #get drop out probability print(images.get_shape(), val_images.get_shape()) #logits1= drd.inference(images, FLAGS.n_residual_blocks) logits = model_2.inference(images, n=4, reuse=tf.AUTO_REUSE, is_training=True) val_logits = model_2.inference(images, n=4, reuse=tf.AUTO_REUSE, is_training=False) #logits = drd.resnet_v1_50(images, training=True) #val_logits = drd.resnet_v1_50(val_images, training = False) #softmx logits soft_max_logits = tf.nn.softmax(logits) soft_max_logits_val = tf.nn.softmax(val_logits) # calculate predictions predictions = tf.cast(tf.argmax(soft_max_logits, axis=1), tf.int32) val_predictions = tf.cast(tf.argmax(soft_max_logits_val, axis=1), tf.int32) # ops for batch accuracy calcultion correct_prediction = tf.equal(predictions, labels) val_correct_prediction = tf.equal(val_predictions, labels) batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) val_batch_accuracy = tf.reduce_mean( tf.cast(val_correct_prediction, tf.float32)) # Calculate loss, which includes softmax cross entropy and L2 regularization. cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy') tf.summary.scalar('cross_entropy', cross_entropy) # If no loss_filter_fn is passed, assume we want the default behavior, # which is that batch_normalization variables are excluded from loss. def exclude_batch_norm(name): return 'batch_normalization' not in name loss_filter_fn = None or exclude_batch_norm # Add weight decay to the loss. l2_loss = weight_decay * tf.add_n( # loss is computed using fp32 for numerical stability. [ tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables() if loss_filter_fn(v.name) ]) tf.summary.scalar('l2_loss', l2_loss) loss = cross_entropy + l2_loss global_step = tf.train.get_or_create_global_step() #list of lr decay factors lr_decay_factors = [1, 0.1, 0.01, 0.001, 0.0001] learning_rate = 0.001 # Create a tensor named learning_rate for logging purposes tf.identity(learning_rate, name='learning_rate') tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum) minimize_op = optimizer.minimize(loss, global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.group(minimize_op, update_ops) # calculate training accuracy # Calculate loss. #loss = drd.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #train_op = drd.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables()) #variables = slim.get_variables_to_restore() #variables_to_restore = [v for v in variables if not v.name.split('/')[-1] != 'weights:0'] # Add ops to save and restore all the variables. #saver_pre = tf.train.Saver(variables_to_restore[0:-2]) # exclude logits layer # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.save_dir, sess.graph) step_start = 0 try: ####Trying to find last checkpoint file fore full final model exist### print("Trying to restore last checkpoint ...") save_dir = FLAGS.save_dir # Use TensorFlow to find the latest checkpoint - if any. last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir) # Try and load the data in the checkpoint. saver.restore(sess, save_path=last_chk_path) # If we get to this point, the checkpoint was successfully loaded. print("Restored checkpoint from:", last_chk_path) # get the step integer from restored path to start step from there uninitialized_vars = [] for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: print("not init") print(var) uninitialized_vars.append(var) # create init op for the still unitilized variables init_new_vars_op = tf.variables_initializer(uninitialized_vars) sess.run(init_new_vars_op) except: # If all the above failed for some reason, simply # initialize all the variables for the TensorFlow graph. print( "Failed to restore any checkpoints. Initializing variables instead." ) sess.run(init) accuracy_dev = [] val_accuracy_dev = [] step_start = 0 for step in range(step_start, FLAGS.max_steps): start_time = time.time() #run train op _, loss_value, accuracy, gs = sess.run( [train_op, loss, batch_accuracy, global_step]) #setting up a learning rate decay scheme if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 30: learning_rate = learning_rate * lr_decay_factors[1] if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 60: learning_rate = learning_rate * lr_decay_factors[2] if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 90: learning_rate = learning_rate * lr_decay_factors[3] if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 120: learning_rate = learning_rate * lr_decay_factors[4] accuracy_dev.append(accuracy) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: val_acc = sess.run([val_batch_accuracy]) val_accuracy_dev.append(val_acc) num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f ' 'sec/batch), validation accuracy %.2f') # take averages of all the accuracies from the previous bathces print(format_str % (datetime.now(), step, loss_value, np.mean(accuracy_dev), examples_per_sec, sec_per_batch, np.mean(val_accuracy_dev))) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 100 == 0 or (step + 1) == FLAGS.max_steps: #set paths and saving ops for the full and sub_network checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt') #pre_trained_path = os.path.join(FLAGS.pre_trained_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels for CIFAR-10. images, labels, names = drd.distorted_inputs() # get validation data val_images, val_labels = drd.inputs(True) # Build a Graph that computes the logits predictions from the # inference model. #logits1= drd.inference(images, FLAGS.n_residual_blocks) logits = drd.shallow_oxford_net_C(images) val_logits = drd.shallow_oxford_net_C(val_images) # calculate predictions predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32) val_predictions = tf.cast(tf.argmax(val_logits, axis=1), tf.int32) # ops for batch accuracy calcultion correct_prediction = tf.equal(predictions, labels) val_correct_prediction = tf.equal(val_predictions, labels) batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) val_batch_accuracy = tf.reduce_mean( tf.cast(val_correct_prediction, tf.float32)) tf.summary.scalar("Training Accuracy", batch_accuracy) # Calculate loss. loss = drd.loss(logits, labels) # updates the model parameters. train_op = drd.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables()) sub_network = 'oxford_net' #saver_30 = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=sub_network)) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() #Build an initialization operation to run below. init = tf.global_variables_initializer() #Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) step_start = 0 try: ####Trying to find last checkpoint file fore full final model exist### print("Trying to restore last checkpoint ...") save_dir = FLAGS.save_dir # Use TensorFlow to find the latest checkpoint - if any. last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir) # Try and load the data in the checkpoint. saver.restore(sess, save_path=last_chk_path) # If we get to this point, the checkpoint was successfully loaded. print("Restored checkpoint from:", last_chk_path) uninitialized_vars = [] for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) # create init op for the still unitilized variables init_new_vars_op = tf.variables_initializer(uninitialized_vars) sess.run(init_new_vars_op) # get the step integer from restored path to start step from there step_start = int( filter( str.isdigit, unicodedata.normalize('NFKD', last_chk_path).encode( 'ascii', 'ignore'))) except: # If all the above failed for some reason, simply # initialize all the variables for the TensorFlow graph. print( "Failed to restore any checkpoints. Initializing variables instead." ) sess.run(init) names_iterated = [] accuracy_dev = [] val_accuracy_dev = [] for step in xrange(step_start, FLAGS.max_steps): start_time = time.time() _, loss_value, accuracy, names_strings = sess.run( [train_op, loss, batch_accuracy, names]) #append the next accuray to the development list accuracy_dev.append(accuracy) names_iterated.append(names_strings) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: im_id, val_acc = sess.run([names, val_batch_accuracy]) val_accuracy_dev.append(val_acc) num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f ' 'sec/batch), validation accuracy %.2f, image_name: %s') # take averages of all the accuracies from the previous bathces print(format_str % (datetime.now(), step, loss_value, np.mean(accuracy_dev), examples_per_sec, sec_per_batch, np.mean(val_accuracy_dev), im_id)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 100 == 0 or (step + 1) == FLAGS.max_steps: #set paths and saving ops for the full and sub_network checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') pre_trained_path = os.path.join(FLAGS.pre_trained_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #saver_30.save(sess, pre_trained_path, global_step=step) #write files to disk to verify input pipeline is correct f = open("file_names_" + str(step), "w") f.write("\n".join(map(lambda x: str(x), names_iterated))) f.close() names_iterated = []