def tower_loss(scope, images, labels): """Calculate the total loss on a single tower running the dogcat model. Args: scope: unique prefix string identifying the dogcat tower, e.g. 'tower_0' images: Images. 4D tensor of shape [batch_size, height, width, 3]. labels: Labels. 1D tensor of shape [batch_size]. Returns: Tensor of shape [] containing the total loss for a batch of data """ # Build inference Graph. logits = dogcat.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = dogcat.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % dogcat.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def train(): """Train dogcat for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for dogcat. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = dogcat.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = dogcat.inference(images) # Calculate loss. loss = dogcat.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = dogcat.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def predict(path): """Eval dogcat for a number of steps.""" with tf.Graph().as_default() as g: image = dogcat.predict_input_get_resized_image(path) image = tf.expand_dims(image, 0) # Build a Graph that computes the logits predictions from the # inference model. logits = dogcat.inference(image) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( dogcat.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) return predict_once(saver, logits)
def train(): with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() with tf.device('/cpu:0'): images, labels = dogcat.distorted_inputs() logits = dogcat.inference(images) loss = dogcat.loss(logits, labels) train_op = dogcat.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = time.time() loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = '%s: step %d, loss = %.4f (%.1f examples/sec; %.3f sec/batch)' print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(gpu_options=gpu_options)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(): with tf.Graph().as_default() as g: eval_data = FLAGS.eval_data images, labels = dogcat.inputs(eval_data=eval_data) logits = dogcat.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_average = tf.train.ExponentialMovingAverage( dogcat.MOVING_AVERAGE) variables_to_restore = variable_average.variables_to_restore() saver = tf.train.Saver(variables_to_restore) ckpt_path = '' while True: ckpt_path = eval_once(saver, top_k_op, ckpt_path) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): with tf.Graph().as_default() as g: eval_data = FLAGS.eval_data images, labels = dogcat.inputs(eval_data=eval_data) logits = dogcat.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_average = tf.train.ExponentialMovingAverage( dogcat.MOVING_AVERAGE) variables_to_restore = variable_average.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_all(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval dogcat for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for dogcat. eval_data = FLAGS.eval_data == 'test' images, labels = dogcat.inputs(eval_data=eval_data) # =====test===== # sf = tf.InteractiveSession() # # We can just use 'c.eval()' without passing 'sess' # print(labels.eval()) # sf.close() # =====test===== # Build a Graph that computes the logits predictions from the # inference model. logits = dogcat.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( dogcat.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, logits, labels, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)