def __init__(self, model_def_file, class_labels_file):
        logging.info('Loading net and associated files...')

        with tf.Graph().as_default(), tf.device('cpu:0'):
            self.sess = tf.Session()
            self.image_buffer = tf.placeholder(tf.string)
            image = tf.image.decode_jpeg(self.image_buffer, channels=3)
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
            image = self.eval_image(image, 299, 299)
            image = tf.sub(image, 0.5)
            image = tf.mul(image, 2.0)
            images = tf.expand_dims(image, 0)

            # Run inference.
            logits, predictions = inception_model.inference(
                images, NUM_CLASSES + 1)

            # Transform output to topK result.
            self.values, self.indices = tf.nn.top_k(
                predictions, NUM_TOP_CLASSES)

            variable_averages = tf.train.ExponentialMovingAverage(
                inception_model.MOVING_AVERAGE_DECAY)
            variables_to_restore = variable_averages.variables_to_restore()
            tf.initialize_all_variables().run(session=self.sess)
            tf.initialize_local_variables().run(session=self.sess)
            saver = tf.train.Saver(variables_to_restore)
            saver.restore(self.sess, model_def_file)
            # Required to get the filename matching to run.

            self.label_names = ['none']
            with open(class_labels_file) as f:
                for line in f.read().decode("utf-8").splitlines():
                    self.label_names.append(line)
Example #2
0
def _tower_loss(images, labels, num_classes, scope, reuse_variables=None):
  """Calculate the total loss on a single tower running the ImageNet model.

  We perform 'batch splitting'. This means that we cut up a batch across
  multiple GPU's. For instance, if the batch size = 32 and num_gpus = 2,
  then each tower will operate on an batch of 16 images.

  Args:
    images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
                                       FLAGS.image_size, 3].
    labels: 1-D integer Tensor of [batch_size].
    num_classes: number of classes
    scope: unique prefix string identifying the ImageNet tower, e.g.
      'tower_0'.

  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """
  # When fine-tuning a model, we do not restore the logits but instead we
  # randomly initialize the logits. The number of classes in the output of the
  # logit is the number of classes in specified Dataset.
  restore_logits = not FLAGS.fine_tune

  # Build inference Graph.
  with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
    logits = inception.inference(images, num_classes, for_training=True,
                                 restore_logits=restore_logits,
                                 scope=scope)

  # Build the portion of the Graph calculating the losses. Note that we will
  # assemble the total_loss using a custom function below.
  split_batch_size = images.get_shape().as_list()[0]
  inception.loss(logits, labels, batch_size=split_batch_size)

  # Assemble all of the losses for the current tower only.
  losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope)

  # Calculate the total loss for the current tower.
  regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
  total_loss = tf.add_n(losses + regularization_losses, name='total_loss')

  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  loss_averages_op = loss_averages.apply(losses + [total_loss])

  # Attach a scalar summmary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
    # session. This helps the clarity of presentation on TensorBoard.
    loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name)
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    tf.scalar_summary(loss_name +' (raw)', l)
    tf.scalar_summary(loss_name, loss_averages.average(l))

  with tf.control_dependencies([loss_averages_op]):
    total_loss = tf.identity(total_loss)
  return total_loss
Example #3
0
 def build_graph(self, for_training=True):
     print("Building graph...")
     self.add_image_pre_processing()
     self.add_image_distortion()
     extra = self.add_meta_nn()
     self.logits = inception.inference(self.inception_input, self.class_count, extra_to_last_layer=extra,
                                       for_training=for_training, restore_logits=not for_training)
     self.add_train_step()
     self.add_result_ops()
Example #4
0
def evaluate(dataset):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels from the dataset.
    images, labels, all_filenames, filename_queue = image_processing.inputs(dataset)

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class
    num_classes = dataset.num_classes() + 1
    print("there are %d classes!" % dataset.num_classes())

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _, end_points, net2048, sel_end_points = inception.inference(images, num_classes)

    # Calculate predictions.
    #max_percent =  tf.argmax(logits,1)
    #max_percent = tf.reduce_max(logits, reduction_indices=[1]) / tf.add_n(logits)
    max_percent = end_points['predictions']
    # max_percent = len(end_points)
    #for kk in range(len(labels)):
    #   #max_percent.append(end_points['predictions'][kk][labels[kk]])
    #   max_percent.append(labels[kk])
    if FLAGS.mode == '0_softmax':
      top_1_op = tf.nn.in_top_k(logits, labels, 1)
      top_5_op = tf.nn.in_top_k(logits, labels, 5)
    elif FLAGS.mode == '1_sigmoid':
      top_1_op = None
      top_5_op = None
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def)

    while True:
      precision_at_1, current_score = _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op, max_percent, all_filenames, filename_queue, net2048, sel_end_points, logits, labels)
      print("%s: Precision: %.4f " % (datetime.now(), precision_at_1) )
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
    return precision_at_1, current_score
def export():
    with tf.Graph().as_default(), tf.Session() as sess:
        if FLAGS.export_type == 'mobile':
            input_, image_raw = mobile_input()
        elif FLAGS.export_type == 'inference':
            input_, image_raw = inference_input()
        else:
            print('export_type must be mobile or inference, currently %s' %
                    (FLAGS.export_type))
            return

        logits, _ = inception_model.inference(input_, FLAGS.num_classes + 1)
        softmax = tf.nn.softmax(logits, name='softmax')

        variable_averages = tf.train.ExponentialMovingAverage(
            inception_model.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Load checkpoint
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            if os.path.isabs(ckpt.model_checkpoint_path):
                # Restores from checkpoint with absolute path.
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                # Restores from checkpoint with relative path.
                saver.restore(sess, os.path.join(FLAGS.checkpoint_dir,
                                                 ckpt.model_checkpoint_path))

            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/imagenet_train/model.ckpt-0,
            # extract global_step from it.
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
            print('Succesfully loaded model from %s at step=%s.' %
                (ckpt.model_checkpoint_path, global_step))

            # Write out graph def
            graph_def = sess.graph.as_graph_def()
            tf.train.write_graph(sess.graph.as_graph_def(), os.path.dirname(FLAGS.export_graph),
                    os.path.basename(FLAGS.export_graph))

            print('Successfully converted checkpoint:\n %s/%s\n into proto\n %s\n with inputs of size %d' %
                 (FLAGS.checkpoint_dir, ckpt.model_checkpoint_path, FLAGS.export_graph, FLAGS.image_size))
        else:
            print('No checkpoint file found')
Example #6
0
def evaluate_op(dataset):
    # Get images and labels from the dataset.
    images, labels, _ = image_processing.inputs(dataset)

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    #num_classes = dataset.num_classes() + 1
    num_classes = dataset.num_classes()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)

    # Calculate predictions.
    top_1_op = tf.nn.in_top_k(logits, labels, 1)
    top_5_op = tf.nn.in_top_k(logits, labels, 5)

    return top_1_op, top_5_op
Example #7
0
def predict(dataset):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels from the dataset.
    images, labels, filenames = image_processing.inputs(dataset)

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    num_classes = dataset.num_classes()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
      inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    _predict_once(saver, filenames, logits)
def test(dataset):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels from the dataset.
    images, _, filenames = image_processing.inputs(dataset)

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    num_classes = dataset.num_classes() + 1

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)

    output = tf.nn.softmax(tf.slice(logits, [0,1], [-1,-1]), name='output')

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)


    results = _test(saver, filenames, output)

    current_time = datetime.now().strftime('%Y-%m-%d-%Hh%Mm%Ss')
    csvfilename = os.path.join(FLAGS.test_dir, 'submission-{}.csv'.format(current_time))
    zipfilename = os.path.join(FLAGS.test_dir, '{}.zip'.format(csvfilename))

    with open(csvfilename, 'wb') as csvfile:
      writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
      writer.writerow(['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
      for batch_result in results:
        for filename, result in batch_result:
          writer.writerow([filename] + result.tolist())

    with zipfile.ZipFile(zipfilename, 'w') as myzip:
      myzip.write(csvfilename)

    print('Submission available at: %s' % (zipfilename))
Example #9
0
def evaluate(dataset):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default():
    # Get images and labels from the dataset.
    images, labels = image_processing.inputs(dataset)

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    num_classes = dataset.num_classes() + 1

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)
    max_percent = end_points['predictions']

    # Calculate predictions.
    # top_1_op = tf.nn.in_top_k(logits, labels, 1)
    # top_5_op = tf.nn.in_top_k(logits, labels, 5)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)

    while True:
      _eval_once(saver, summary_writer, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Example #10
0
def export():
  # Create index->synset mapping
  synsets = []
  with open(SYNSET_FILE) as f:
    synsets = f.read().splitlines()
  # Create synset->metadata mapping
  texts = {}
  with open(METADATA_FILE) as f:
    for line in f.read().splitlines():
      parts = line.split('\t')
      assert len(parts) == 2
      texts[parts[0]] = parts[1]

  with tf.Graph().as_default():
    # Build inference model.
    # Please refer to Tensorflow inception model for details.

    # Input transformation.
    jpegs = tf.placeholder(tf.string)
    images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

    # Run inference.
    logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

    # Transform output to topK result.
    values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)

    # Create a constant string Tensor where the i'th element is
    # the human readable class description for the i'th index.
    # Note that the 0th index is an unused background class
    # (see inception model definition code).
    class_descriptions = ['unused background']
    for s in synsets:
      class_descriptions.append(texts[s])
    class_tensor = tf.constant(class_descriptions)

    classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices),
                                                mapping=class_tensor)

    # Restore variables from training checkpoint.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception_model.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    with tf.Session() as sess:
      # Restore variables from training checkpoints.
      ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/imagenet_train/model.ckpt-0,
        # extract global_step from it.
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('Successfully loaded model from %s at step=%s.' %
              (ckpt.model_checkpoint_path, global_step))
      else:
        print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
        return

      # Export inference model.
      init_op = tf.group(tf.initialize_all_tables(), name='init_op')
      model_exporter = exporter.Exporter(saver)
      model_exporter.init(init_op=init_op, named_graph_signatures={
          'inputs': exporter.generic_signature({'images': jpegs}),
          'outputs': exporter.generic_signature({'classes': classes,
                                                 'scores': values})})
      model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess)
      print('Successfully exported model to %s' % FLAGS.export_dir)
def main(img_dir):
    filelist = []
    for file in os.listdir(img_dir):
        name, ext = os.path.splitext(file)
        if ext == '.png' or ext == '.jpg':
            filelist.append(os.path.join(img_dir, file))

    # List of all images to process
    print("Running inference on images", filelist)
    global BATCH_SIZE

    if len(filelist) < BATCH_SIZE:
        BATCH_SIZE = len(filelist)

    # build the tensorflow graph.
    with tf.Graph().as_default() as g:
        input_shape = [IMAGE_SIZE, IMAGE_SIZE, 3]
        final_shape = [1, IMAGE_SIZE, IMAGE_SIZE, 3]
        img_placeholder = tf.placeholder(
            tf.uint8, shape=input_shape)
        print(img_placeholder.shape)
        # reshape to add batch size dimension
        img = tf.reshape(img_placeholder, final_shape)
        # cast to float
        img = tf.dtypes.cast(img, dtype=tf.float32)
        # normalize input to values in range [0,1]
        img = img / 255.0
        print('Image shape {}'.format(img.shape))
        logits, _ = inception.inference(img, NUM_CLASSES)
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_dir)
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=True))

        with sess:
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoint
                print('Restoring trained model from checkpoint')
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Checkpoint loaded')
            else:
                print('No checkpoint file found')

            for files_batch in chunks(filelist, BATCH_SIZE):

                start_time = time.time()

                image_list = [load_image(file) for file in files_batch]
                image_batch = np.array(image_list)
                print(image_batch.shape)
                image_batch = np.reshape(
                    image_batch, [IMAGE_SIZE, IMAGE_SIZE, 3])

                score = sess.run(logits, feed_dict={
                                 img_placeholder: image_batch})

                pos_score = np.exp(
                    score[:, 1])/(np.exp(score[:, 1])+np.exp(score[:, 0]))

                for i in range(BATCH_SIZE):
                    print("Score %s : %f" % (files_batch[i], pos_score[i]))

                duration = time.time() - start_time

                print("Batch done Duration: " + str(duration))
            if SAVE_MODEL:
                save_dir = './saved_models'
                print(
                    'Saving model for deployment in directory {}'.format(save_dir))
                tf.saved_model.simple_save(sess,
                                           save_dir,
                                           inputs={
                                               'image': img_placeholder},
                                           outputs={'predictions': logits})
Example #12
0
def inference(images):
    logits, _ = inception_model.inference(images, 1001)
    return logits
Example #13
0
def evaluate(dataset):
    """Evaluate model on Dataset for a number of steps."""
    with tf.Graph().as_default():
        # Get images and labels from the dataset.
        images, labels, all_filenames, filename_queue = image_processing.inputs(
            dataset)

        # Number of classes in the Dataset label set plus 1.
        # Label 0 is reserved for an (unused) background class.
        num_classes = dataset.num_classes() + 1

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, _, end_points, net2048, sel_end_points = inception.inference(
            images, num_classes)

        # Calculate predictions.
        #max_percent =  tf.argmax(logits,1)
        #max_percent = tf.reduce_max(logits, reduction_indices=[1]) / tf.add_n(logits)
        max_percent = end_points['predictions']
        # max_percent = len(end_points)
        #for kk in range(len(labels)):
        #   #max_percent.append(end_points['predictions'][kk][labels[kk]])
        #   max_percent.append(labels[kk])
        #top_1_op =  tf.nn.in_top_k(logits, labels, 1)
        #top_5_op =  tf.nn.in_top_k(logits, labels, 5)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        graph_def = tf.get_default_graph().as_graph_def()
        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir,
                                               graph_def=graph_def)

        # Label 0 is reserved for an (unused) background class.
        num_classes = dataset.num_classes() + 1
        '''
     # Split the batch of images and labels for towers.
    images_splits = tf.split(axis=0, num_or_size_splits=1, value=images)
    labels_splits = tf.split(axis=0, num_or_size_splits=1, value=labels)

    # Calculate the gradients for each model tower.
    tower_grads = []
    reuse_variables = None

    for i in range(1):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope:
          # Force all Variables to reside on the CPU.
          with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
            # Calculate the loss for one tower of the ImageNet model. This
            # function constructs the entire ImageNet model but shares the
            # variables across all towers.
            loss = _tower_loss(images_splits[i], labels_splits[i], num_classes,
                               scope, reuse_variables)

            # Reuse variables for the next tower.
            reuse_variables = True
    '''
        loss = False

        while True:
            precision_at_1, current_score = _eval_once(
                saver, summary_writer, summary_op, max_percent, all_filenames,
                filename_queue, net2048, sel_end_points, logits, labels, loss)
            print("%s: Precision: %.4f --------------------" %
                  (datetime.now(), precision_at_1))
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
        return precision_at_1, current_score
Example #14
0
def train(target, dataset, cluster_spec):
  """Train Inception on a dataset for a number of steps."""
  # Number of workers and parameter servers are inferred from the workers and ps
  # hosts string.
  num_workers = len(cluster_spec.as_dict()['worker'])
  num_parameter_servers = len(cluster_spec.as_dict()['ps'])
  # If no value is given, num_replicas_to_aggregate defaults to be the number of
  # workers.
  if FLAGS.num_replicas_to_aggregate == -1:
    num_replicas_to_aggregate = num_workers
  else:
    num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

  # Both should be greater than 0 in a distributed training.
  assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and '
                                                         'num_parameter_servers'
                                                         ' must be > 0.')

  # Choose worker 0 as the chief. Note that any worker could be the chief
  # but there should be only one chief.
  is_chief = (FLAGS.task_id == 0)

  # Ops are assigned to worker by default.
  with tf.device('/job:worker/task:%d' % FLAGS.task_id):
    # Variables and its related init/assign ops are assigned to ps.
    with slim.scopes.arg_scope(
        [slim.variables.variable, slim.variables.global_step],
        device=slim.variables.VariableDeviceChooser(num_parameter_servers)):
      # Create a variable to count the number of train() calls. This equals the
      # number of updates applied to the variables.
      global_step = slim.variables.global_step()

      # Calculate the learning rate schedule.
      num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                               FLAGS.batch_size)
      # Decay steps need to be divided by the number of replicas to aggregate.
      decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay /
                        num_replicas_to_aggregate)

      # Decay the learning rate exponentially based on the number of steps.
      lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                      global_step,
                                      decay_steps,
                                      FLAGS.learning_rate_decay_factor,
                                      staircase=True)
      # Add a summary to track the learning rate.
      tf.summary.scalar('learning_rate', lr)

      # Create an optimizer that performs gradient descent.
      opt = tf.train.RMSPropOptimizer(lr,
                                      RMSPROP_DECAY,
                                      momentum=RMSPROP_MOMENTUM,
                                      epsilon=RMSPROP_EPSILON)

      images, labels = image_processing.distorted_inputs(
          dataset,
          batch_size=FLAGS.batch_size,
          num_preprocess_threads=FLAGS.num_preprocess_threads)

      # Number of classes in the Dataset label set plus 1.
      # Label 0 is reserved for an (unused) background class.
      num_classes = dataset.num_classes() + 1
      logits = inception.inference(images, num_classes, for_training=True)
      # Add classification loss.
      inception.loss(logits, labels)

      # Gather all of the losses including regularization losses.
      losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
      losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

      total_loss = tf.add_n(losses, name='total_loss')

      if is_chief:
        # Compute the moving average of all individual losses and the
        # total loss.
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        # Attach a scalar summmary to all individual losses and the total loss;
        # do the same for the averaged version of the losses.
        for l in losses + [total_loss]:
          loss_name = l.op.name
          # Name each loss as '(raw)' and name the moving average version of the
          # loss as the original loss name.
          tf.summary.scalar(loss_name + ' (raw)', l)
          tf.summary.scalar(loss_name, loss_averages.average(l))

        # Add dependency to compute loss_averages.
        with tf.control_dependencies([loss_averages_op]):
          total_loss = tf.identity(total_loss)

      # Track the moving averages of all trainable variables.
      # Note that we maintain a 'double-average' of the BatchNormalization
      # global statistics.
      # This is not needed when the number of replicas are small but important
      # for synchronous distributed training with tens of workers/replicas.
      exp_moving_averager = tf.train.ExponentialMovingAverage(
          inception.MOVING_AVERAGE_DECAY, global_step)

      variables_to_average = (
          tf.trainable_variables() + tf.moving_average_variables())

      # Add histograms for model variables.
      for var in variables_to_average:
        tf.summary.histogram(var.op.name, var)

      # Create synchronous replica optimizer.
      opt = tf.train.SyncReplicasOptimizer(
          opt,
          replicas_to_aggregate=num_replicas_to_aggregate,
          total_num_replicas=num_workers,
          variable_averages=exp_moving_averager,
          variables_to_average=variables_to_average)

      batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)
      assert batchnorm_updates, 'Batchnorm updates are missing'
      batchnorm_updates_op = tf.group(*batchnorm_updates)
      # Add dependency to compute batchnorm_updates.
      with tf.control_dependencies([batchnorm_updates_op]):
        total_loss = tf.identity(total_loss)

      # Compute gradients with respect to the loss.
      grads = opt.compute_gradients(total_loss)

      # Add histograms for gradients.
      for grad, var in grads:
        if grad is not None:
          tf.summary.histogram(var.op.name + '/gradients', grad)

      apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)

      with tf.control_dependencies([apply_gradients_op]):
        train_op = tf.identity(total_loss, name='train_op')

      # Get chief queue_runners and init_tokens, which is used to synchronize
      # replicas. More details can be found in SyncReplicasOptimizer.
      chief_queue_runners = [opt.get_chief_queue_runner()]
      init_tokens_op = opt.get_init_tokens_op()

      # Create a saver.
      saver = tf.train.Saver()

      # Build the summary operation based on the TF collection of Summaries.
      summary_op = tf.summary.merge_all()

      # Build an initialization operation to run below.
      init_op = tf.global_variables_initializer()

      # We run the summaries in the same thread as the training operations by
      # passing in None for summary_op to avoid a summary_thread being started.
      # Running summaries and training operations in parallel could run out of
      # GPU memory.
      sv = tf.train.Supervisor(is_chief=is_chief,
                               logdir=FLAGS.train_dir,
                               init_op=init_op,
                               summary_op=None,
                               global_step=global_step,
                               saver=saver,
                               save_model_secs=FLAGS.save_interval_secs)

      tf.logging.info('%s Supervisor' % datetime.now())

      sess_config = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=FLAGS.log_device_placement)

      # Get a session.
      sess = sv.prepare_or_wait_for_session(target, config=sess_config)

      # Start the queue runners.
      queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
      sv.start_queue_runners(sess, queue_runners)
      tf.logging.info('Started %d queues for processing input data.',
                      len(queue_runners))

      if is_chief:
        sv.start_queue_runners(sess, chief_queue_runners)
        sess.run(init_tokens_op)

      # Train, checking for Nans. Concurrently run the summary operation at a
      # specified interval. Note that the summary_op and train_op never run
      # simultaneously in order to prevent running out of GPU memory.
      next_summary_time = time.time() + FLAGS.save_summaries_secs
      while not sv.should_stop():
        try:
          run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
          run_metadata = tf.RunMetadata()
          start_time = time.time()
          loss_value, step = sess.run([train_op, global_step], options=run_options, run_metadata=run_metadata)
          assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
          if step > FLAGS.max_steps:
            break
          duration = time.time() - start_time

          tl = timeline.Timeline(run_metadata.step_stats)

          examples_per_sec = FLAGS.batch_size / float(duration)
          format_str = ('Worker %d: %s: step %d, loss = %.2f'
                          '(%.1f examples/sec; %.3f  sec/batch)')
          tf.logging.info(format_str %
                          (FLAGS.task_id, datetime.now(), step, loss_value,
                           examples_per_sec, duration))

          # Terminate the job on the 100th iteration
          if step == 100:
            exit()

          # Determine if the summary_op should be run on the chief worker.
          if is_chief and next_summary_time < time.time():
            tf.logging.info('Running Summary operation on the chief.')
            summary_str = sess.run(summary_op)
            sv.summary_computed(sess, summary_str)
            tf.logging.info('Finished running Summary operation.')

            # Determine the next time for running the summary.
            next_summary_time += FLAGS.save_summaries_secs
        except:
          if is_chief:
            tf.logging.info('Chief got exception while running!')
          raise

      return
def _tower_loss(images, labels, num_classes, scope, reuse_variables=None):
    """Calculate the total loss on a single tower running the ImageNet model.

  We perform 'batch splitting'. This means that we cut up a batch across
  multiple GPU's. For instance, if the batch size = 32 and num_gpus = 2,
  then each tower will operate on an batch of 16 images.

  Args:
    images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
                                       FLAGS.image_size, 3].
    labels: 1-D integer Tensor of [batch_size].
    num_classes: number of classes
    scope: unique prefix string identifying the ImageNet tower, e.g.
      'tower_0'.

  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """
    # When fine-tuning a model, we do not restore the logits but instead we
    # randomly initialize the logits. The number of classes in the output of the
    # logit is the number of classes in specified Dataset.
    restore_logits = not FLAGS.fine_tune

    # Build inference Graph.
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        logits = inception.inference(images,
                                     num_classes,
                                     for_training=True,
                                     restore_logits=restore_logits,
                                     scope=scope)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    split_batch_size = images.get_shape().as_list()[0]
    inception.loss(logits, labels, batch_size=split_batch_size)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope)

    # Calculate the total loss for the current tower.
    regularization_losses = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
    total_loss = tf.add_n(losses + regularization_losses, name='total_loss')

    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summmary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        # session. This helps the clarity of presentation on TensorBoard.
        loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name)
        # Name each loss as '(raw)' and name the moving average version of the loss
        # as the original loss name.
        tf.summary.scalar(loss_name + ' (raw)', l)
        tf.summary.scalar(loss_name, loss_averages.average(l))

    with tf.control_dependencies([loss_averages_op]):
        total_loss = tf.identity(total_loss)
    return total_loss
Example #16
0
def train(target, dataset, cluster_spec, ctx):
    """Train Inception on a dataset for a number of steps."""
    # Number of workers and parameter servers are infered from the workers and ps
    # hosts string.
    num_workers = len(cluster_spec.as_dict()['worker'])
    num_parameter_servers = len(cluster_spec.as_dict()['ps'])
    # If no value is given, num_replicas_to_aggregate defaults to be the number of
    # workers.
    if FLAGS.num_replicas_to_aggregate == -1:
        num_replicas_to_aggregate = num_workers
    else:
        num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

    # Both should be greater than 0 in a distributed training.
    assert num_workers > 0 and num_parameter_servers > 0, (
        ' num_workers and '
        'num_parameter_servers'
        ' must be > 0.')

    # Choose worker 0 as the chief. Note that any worker could be the chief
    # but there should be only one chief.
    is_chief = (FLAGS.task_id == 0)

    # Ops are assigned to worker by default.
    with tf.device('/job:worker/task:%d' % FLAGS.task_id):
        # Variables and its related init/assign ops are assigned to ps.
        with slim.scopes.arg_scope(
            [slim.variables.variable, slim.variables.global_step],
                device=slim.variables.VariableDeviceChooser(
                    num_parameter_servers)):
            # Create a variable to count the number of train() calls. This equals the
            # number of updates applied to the variables.
            global_step = slim.variables.global_step()

            # Calculate the learning rate schedule.
            num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                     FLAGS.batch_size)
            # Decay steps need to be divided by the number of replicas to aggregate.
            decay_steps = int(num_batches_per_epoch *
                              FLAGS.num_epochs_per_decay /
                              num_replicas_to_aggregate)

            # Decay the learning rate exponentially based on the number of steps.
            lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                            global_step,
                                            decay_steps,
                                            FLAGS.learning_rate_decay_factor,
                                            staircase=True)
            # Add a summary to track the learning rate.
            tf.summary.scalar('learning_rate', lr)

            # Create an optimizer that performs gradient descent.
            opt = tf.train.RMSPropOptimizer(lr,
                                            RMSPROP_DECAY,
                                            momentum=RMSPROP_MOMENTUM,
                                            epsilon=RMSPROP_EPSILON)

            if FLAGS.input_mode == 'spark':

                def feed_dict(feed_batch):
                    # extract TFRecords, since feed_batch is [(TFRecord, None)]
                    tfrecords = []
                    for elem in feed_batch:
                        tfrecords.append(str(elem[0]))
                    return tfrecords

                batch = tf.placeholder(
                    tf.string,
                    [FLAGS.batch_size / FLAGS.num_preprocess_threads])

                # The following is adapted from image_processing.py to remove Readers/QueueRunners.
                # Note: this removes the RandomShuffledQueue, so the incoming data is not shuffled.
                # Presumably, this could be done on the Spark side or done in additional TF code.
                examples = tf.unstack(batch)
                images, labels = [], []
                for example_serialized in examples:
                    for thread_id in range(FLAGS.num_preprocess_threads):
                        # Parse a serialized Example proto to extract the image and metadata.
                        image_buffer, label_index, bbox, _ = image_processing.parse_example_proto(
                            example_serialized)
                        image = image_processing.image_preprocessing(
                            image_buffer, bbox, train, thread_id)
                        images.append(image)
                        labels.append(label_index)
                height = FLAGS.image_size
                width = FLAGS.image_size
                depth = 3
                images = tf.cast(images, tf.float32)
                images = tf.reshape(
                    images, shape=[FLAGS.batch_size, height, width, depth])
                tf.summary.image('images', images)
                labels = tf.reshape(labels, [FLAGS.batch_size])
            else:
                images, labels = image_processing.distorted_inputs(
                    dataset,
                    batch_size=FLAGS.batch_size,
                    num_preprocess_threads=FLAGS.num_preprocess_threads)

            # Number of classes in the Dataset label set plus 1.
            # Label 0 is reserved for an (unused) background class.
            num_classes = dataset.num_classes() + 1
            logits = inception.inference(images,
                                         num_classes,
                                         for_training=True)
            # Add classification loss.
            inception.loss(logits, labels)

            # Gather all of the losses including regularization losses.
            losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
            losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

            total_loss = tf.add_n(losses, name='total_loss')

            if is_chief:
                # Compute the moving average of all individual losses and the
                # total loss.
                loss_averages = tf.train.ExponentialMovingAverage(0.9,
                                                                  name='avg')
                loss_averages_op = loss_averages.apply(losses + [total_loss])

                # Attach a scalar summmary to all individual losses and the total loss;
                # do the same for the averaged version of the losses.
                for l in losses + [total_loss]:
                    loss_name = l.op.name
                    # Name each loss as '(raw)' and name the moving average version of the
                    # loss as the original loss name.
                    tf.summary.scalar(loss_name + ' (raw)', l)
                    tf.summary.scalar(loss_name, loss_averages.average(l))

                # Add dependency to compute loss_averages.
                with tf.control_dependencies([loss_averages_op]):
                    total_loss = tf.identity(total_loss)

            # Track the moving averages of all trainable variables.
            # Note that we maintain a 'double-average' of the BatchNormalization
            # global statistics.
            # This is not needed when the number of replicas are small but important
            # for synchronous distributed training with tens of workers/replicas.
            exp_moving_averager = tf.train.ExponentialMovingAverage(
                inception.MOVING_AVERAGE_DECAY, global_step)

            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())

            # Add histograms for model variables.
            for var in variables_to_average:
                tf.summary.histogram(var.op.name, var)

            # Create synchronous replica optimizer.
            opt = tf.train.SyncReplicasOptimizer(
                opt,
                replicas_to_aggregate=num_replicas_to_aggregate,
                total_num_replicas=num_workers,
                variable_averages=exp_moving_averager,
                variables_to_average=variables_to_average)

            batchnorm_updates = tf.get_collection(
                slim.ops.UPDATE_OPS_COLLECTION)
            assert batchnorm_updates, 'Batchnorm updates are missing'
            batchnorm_updates_op = tf.group(*batchnorm_updates)
            # Add dependency to compute batchnorm_updates.
            with tf.control_dependencies([batchnorm_updates_op]):
                total_loss = tf.identity(total_loss)

            # Compute gradients with respect to the loss.
            grads = opt.compute_gradients(total_loss)

            # Add histograms for gradients.
            for grad, var in grads:
                if grad is not None:
                    tf.summary.histogram(var.op.name + '/gradients', grad)

            apply_gradients_op = opt.apply_gradients(grads,
                                                     global_step=global_step)

            with tf.control_dependencies([apply_gradients_op]):
                train_op = tf.identity(total_loss, name='train_op')

            # Get chief queue_runners, init_tokens and clean_up_op, which is used to
            # synchronize replicas.
            # More details can be found in sync_replicas_optimizer.
            chief_queue_runners = [opt.get_chief_queue_runner()]
            init_tokens_op = opt.get_init_tokens_op()

            # Create a saver.
            saver = tf.train.Saver()

            # Build the summary operation based on the TF collection of Summaries.
            summary_op = tf.summary.merge_all()

            # Build an initialization operation to run below.
            init_op = tf.global_variables_initializer()

            # We run the summaries in the same thread as the training operations by
            # passing in None for summary_op to avoid a summary_thread being started.
            # Running summaries and training operations in parallel could run out of
            # GPU memory.
            summary_writer = tf.summary.FileWriter(
                "tensorboard_%d" % ctx.worker_num,
                graph=tf.get_default_graph())
            sv = tf.train.Supervisor(is_chief=is_chief,
                                     logdir=FLAGS.train_dir,
                                     init_op=init_op,
                                     summary_op=None,
                                     global_step=global_step,
                                     summary_writer=summary_writer,
                                     saver=saver,
                                     save_model_secs=FLAGS.save_interval_secs)

            tf.logging.info('%s Supervisor' % datetime.now())

            sess_config = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=FLAGS.log_device_placement)

            # Get a session.
            sess = sv.prepare_or_wait_for_session(target, config=sess_config)

            # Start the queue runners.
            queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
            sv.start_queue_runners(sess, queue_runners)
            tf.logging.info('Started %d queues for processing input data.',
                            len(queue_runners))

            if is_chief:
                sv.start_queue_runners(sess, chief_queue_runners)
                sess.run(init_tokens_op)

            # Train, checking for Nans. Concurrently run the summary operation at a
            # specified interval. Note that the summary_op and train_op never run
            # simultaneously in order to prevent running out of GPU memory.
            next_summary_time = time.time() + FLAGS.save_summaries_secs
            tf_feed = TFNode.DataFeed(ctx.mgr)
            while not sv.should_stop():
                try:
                    start_time = time.time()
                    if FLAGS.input_mode == 'spark':
                        tmp = feed_dict(
                            tf_feed.next_batch(FLAGS.batch_size /
                                               FLAGS.num_preprocess_threads))
                        feed = {batch: tmp}
                        loss_value, step = sess.run([train_op, global_step],
                                                    feed_dict=feed)
                    else:
                        loss_value, step = sess.run([train_op, global_step])
                    assert not np.isnan(
                        loss_value), 'Model diverged with loss = NaN'
                    if step > FLAGS.max_steps:
                        break
                    duration = time.time() - start_time

                    if step % 30 == 0:
                        examples_per_sec = FLAGS.batch_size / float(duration)
                        format_str = ('Worker %d: %s: step %d, loss = %.2f'
                                      '(%.1f examples/sec; %.3f  sec/batch)')
                        tf.logging.info(
                            format_str %
                            (FLAGS.task_id, datetime.now(), step, loss_value,
                             examples_per_sec, duration))

                    # Determine if the summary_op should be run on the chief worker.
                    if FLAGS.input_mode == 'tf' and is_chief and next_summary_time < time.time(
                    ):
                        tf.logging.info(
                            'Running Summary operation on the chief.')
                        summary_str = sess.run(summary_op)
                        sv.summary_computed(sess, summary_str)
                        tf.logging.info('Finished running Summary operation.')

                        # Determine the next time for running the summary.
                        next_summary_time += FLAGS.save_summaries_secs
                except:
                    if is_chief:
                        tf.logging.info('About to execute sync_clean_up_op!')
                    raise

            # Stop the TFNode data feed
            if FLAGS.input_mode == 'spark':
                tf_feed.terminate()

            # Stop the supervisor.  This also waits for service threads to finish.
            sv.stop()

            # Save after the training ends.
            if is_chief:
                saver.save(sess,
                           os.path.join(FLAGS.train_dir, 'model.ckpt'),
                           global_step=global_step)
def retrieve(dataset):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default(), tf.Session() as sess:
    # Get images and labels from the dataset.
    images, labels, filenames_tensor = image_processing.inputs(dataset, return_filenames=True)


    # Build a Graph that computes the features.
    num_classes = dataset.num_classes() + 1
    _, _ = inception.inference(images, num_classes, restore_logits=False)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Restore checkpoint.
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      if os.path.isabs(ckpt.model_checkpoint_path):
        # Restores from checkpoint with absolute path.
        saver.restore(sess, ckpt.model_checkpoint_path)
      else:
        # Restores from checkpoint with relative path.
        saver.restore(sess, os.path.join(FLAGS.checkpoint_dir,
                                         ckpt.model_checkpoint_path))

      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/imagenet_train/model.ckpt-0,
      # extract global_step from it.
      global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      print('Succesfully loaded model from %s at step=%s.' %
            (ckpt.model_checkpoint_path, global_step))
    else:
      print('No checkpoint file found')
      return

    # Start the queue runners.
    coord = tf.train.Coordinator()
    try:
      threads = []
      for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                         start=True))

      num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))

      print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset))
      start_time = time.time()
      features_tensor = tf.get_default_graph().get_tensor_by_name(FLAGS.features_tensor_name)
      features = []
      filenames = []
      step = 0
      while step < num_iter and not coord.should_stop():
        features_batch, filenames_batch = sess.run([features_tensor, filenames_tensor])
        features.append(features_batch)
        filenames.extend(filenames_batch)

        step += 1
        if step % 20 == 0:
          duration = time.time() - start_time
          sec_per_batch = duration / 20.0
          examples_per_sec = FLAGS.batch_size / sec_per_batch
          print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
                'sec/batch)' % (datetime.now(), step, num_iter,
                                examples_per_sec, sec_per_batch))
          start_time = time.time()
      features = features[:FLAGS.num_examples]
      filenames = filenames[:FLAGS.num_examples]

    except Exception as e:  # pylint: disable=broad-except
      coord.request_stop(e)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=10)

    return np.vstack(features), filenames
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        return image


####################################################  加载已经训练好的模型  ################################################
sess = tf.Session()
ckpt = tf.train.get_checkpoint_state(
    '/home/recsys/hzwangjian1/tensorflow/models/inception/darthvader_model')
ckpt = tf.train.get_checkpoint_state(
    '/home/recsys/hzwangjian1/tensorflow/models/inception/inception/flower_model'
)
print(ckpt.model_checkpoint_path)

images_input = tf.placeholder(tf.float32, shape=(1, 299, 299, 3))
logits, _ = inception.inference(images_input, 6)
# Restore the moving average version of the learned variables for eval.
variable_averages = tf.train.ExponentialMovingAverage(
    inception.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, ckpt.model_checkpoint_path)

####################################################  载入一张图片来测试模型  ################################################
img = misc.imread(
    "/home/recsys/hzwangjian1/tensorflow/models/inception/inception/data/raw-data/validation/dandelion/2465442759_d4532a57a3.jpg"
)
import matplotlib.pyplot as plt
fig = plt.figure()
plt.subplot(1, 2, 1)
plt.imshow(img)
Example #19
0
    def build_graph(self, filenames, labels, subset, feed_hypes=None):

        hypes = self.hypes.copy()

        if feed_hypes:
            with tf.name_scope(None):
                for i in feed_hypes:
                    hypes[i] = tf.placeholder("float32", name=i)
                    hypes[i].set_shape([])

        with tf.name_scope("inputs"):

            filenames, labels = tf.train.slice_input_producer(
                tensor_list=[filenames, labels], capacity=hypes["batch_size"] * 2, shuffle=(subset == "train")
            )

            filenames, labels = tf.train.batch(
                tensor_list=[filenames, labels], capacity=hypes["batch_size"] * 2, batch_size=hypes["batch_size"]
            )

            images0 = [
                tf.image.decode_jpeg(tf.read_file(i[0]), channels=3)
                for i in tf.split(0, hypes["batch_size"], filenames)
            ]

            images0 = [skin.util.square_pad(i) for i in images0]

            if subset == "train":
                images0 = [tf.image.random_flip_left_right(i) for i in images0]
                images0 = [tf.image.random_flip_up_down(i) for i in images0]

            if hypes["spatial_transformer"]:
                images = skin.util.spatial_tranform(
                    images0, hypes["batch_size"], subset, hypes["loc_net"], hypes["xform_reg"]
                )
            else:
                images = tf.pack([tf.image.resize_images(i, 299, 299) for i in images0])

            with tf.name_scope(None):
                images = tf.identity(images, name="input")

        logits, logits_aux = inception_model.inference(
            images=(images - 128) / 128.0,
            num_classes=len(self.labels),
            for_training=(subset == "train"),
            restore_logits=(subset != "train"),
        )

        with tf.name_scope(None):
            logits = tf.identity(logits, name="logits")
        tf.histogram_summary("logits", logits)

        with tf.name_scope("loss"):

            batch_size, num_classes = logits.get_shape().as_list()

            labels_sparse = tf.sparse_to_dense(
                sparse_indices=tf.transpose(tf.pack([tf.range(batch_size), labels])),
                output_shape=[batch_size, num_classes],
                sparse_values=np.ones(batch_size, dtype="float32"),
            )

            loss = tf.nn.softmax_cross_entropy_with_logits(logits, labels_sparse)
            loss = tf.reduce_mean(loss, name="loss")

            loss_aux = tf.nn.softmax_cross_entropy_with_logits(logits_aux, labels_sparse)
            loss_aux = tf.reduce_mean(loss_aux, name="loss_aux")

            loss = 0.7 * loss + 0.3 * loss_aux

            tf.scalar_summary("loss", loss)

        fetches = {"loss": loss, "filenames": filenames, "logits": logits}

        def print_graph_ops():
            with open("/tmp/graph_ops.txt", "w") as f:
                for op in tf.get_default_graph().get_operations():
                    f.write(op.type.ljust(35) + "\t" + op.name + "\n")

        if subset == "train":

            reg_losses = tf.get_collection("regularization_losses")

            for i, j in enumerate(reg_losses):
                if "loc_net" in j.name:
                    reg_losses[i] *= hypes["loc_net_reg"]

            reg_loss = tf.add_n(reg_losses)
            tf.scalar_summary("reg_loss", reg_loss)

            with tf.variable_scope("reg_loss"):
                loss += reg_loss

            print_graph_ops()

            global_step = tf.Variable(0, name="global_step", trainable=False)

            opt = eval("tf.train.{}Optimizer".format("Adam"))(
                learning_rate=hypes["learning_rate"],
                epsilon=hypes["epsilon"],
                beta1=hypes["beta1"],
                beta2=hypes["beta2"],
            )

            grads = opt.compute_gradients(loss)
            apply_grads = opt.apply_gradients(grads, global_step)

            variable_averages = tf.train.ExponentialMovingAverage(hypes["variable_averages_decay"], global_step)
            variables_to_average = tf.trainable_variables() + tf.moving_average_variables()
            variables_averages_op = variable_averages.apply(variables_to_average)

            batchnorm_updates_op = tf.group(*tf.get_collection("_update_ops_"))

            train_op = tf.group(apply_grads, variables_averages_op, batchnorm_updates_op)

            for grad, var in grads:
                tf.histogram_summary(var.op.name, var)
                try:
                    tf.histogram_summary(var.op.name + "/gradients", grad)
                except:
                    print var.op.name

            fetches.update({"reg_loss": reg_loss, "train_op": train_op, "global_step": global_step})

        else:

            print_graph_ops()

        return fetches
def export(args):
    FLAGS = tf.app.flags.FLAGS
    """Evaluate model on Dataset for a number of steps."""
    #with tf.Graph().as_default():
    tf.reset_default_graph()

    def preprocess_image(image_buffer):
        """Preprocess JPEG encoded bytes to 3D float Tensor."""

        # Decode the string as an RGB JPEG.
        # Note that the resulting image contains an unknown height and width
        # that is set dynamically by decode_jpeg. In other words, the height
        # and width of image is unknown at compile-time.
        image = tf.image.decode_jpeg(image_buffer, channels=3)
        # After this point, all image pixels reside in [0,1)
        # until the very end, when they're rescaled to (-1, 1).  The various
        # adjust_* ops all require this range for dtype float.
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        # Crop the central region of the image with an area containing 87.5% of
        # the original image.
        image = tf.image.central_crop(image, central_fraction=0.875)
        # Resize the image to the original height and width.
        image = tf.expand_dims(image, 0)
        image = tf.image.resize_bilinear(image,
                                         [FLAGS.image_size, FLAGS.image_size],
                                         align_corners=False)
        image = tf.squeeze(image, [0])
        # Finally, rescale to [-1,1] instead of [0, 1)
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        return image

    # Get images and labels from the dataset.
    jpegs = tf.placeholder(tf.string, [None], name='jpegs')
    images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)
    labels = tf.placeholder(tf.int32, [None], name='labels')

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    dataset = ImagenetData(subset=FLAGS.subset)

    num_classes = dataset.num_classes() + 1

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)

    # Calculate predictions.
    top_1_op = tf.nn.in_top_k(logits, labels, 1)
    top_5_op = tf.nn.in_top_k(logits, labels, 5)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if not ckpt or not ckpt.model_checkpoint_path:
            raise Exception("No checkpoint file found at: {}".format(
                FLAGS.train_dir))
        print("ckpt.model_checkpoint_path: {0}".format(
            ckpt.model_checkpoint_path))

        saver.restore(sess, ckpt.model_checkpoint_path)

        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/imagenet_train/model.ckpt-0,
        # extract global_step from it.
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('Successfully loaded model from %s at step=%s.' %
              (ckpt.model_checkpoint_path, global_step))

        print("Exporting saved_model to: {}".format(args.export_dir))
        # exported signatures defined in code
        signatures = {
            tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            {
                'inputs': {
                    'jpegs': jpegs,
                    'labels': labels
                },
                'outputs': {
                    'top_5_acc': top_5_op
                },
                'method_name':
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME
            }
        }
        TFNode.export_saved_model(sess, args.export_dir,
                                  tf.saved_model.tag_constants.SERVING,
                                  signatures)
        print("Exported saved_model")
Example #21
0

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)


NUM_CLASSES = 7
NUM_TOP_CLASSES = 8

MODEL_CHECKPOINT_PATH = 'dk-finetune/model.ckpt-45000'
#with tf.Graph().as_default():
jpegs = tf.placeholder(tf.string)
images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)
# Run inference.
logits, _ = inception_model.inference(images, NUM_CLASSES + 1)
# Transform output to topK result.
values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)
# Restore the moving average version of the learned variables for eval.
variable_averages = tf.train.ExponentialMovingAverage(
    inception_model.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)

sess = tf.Session()
# Restore variables from training checkpoints.
saver.restore(sess, MODEL_CHECKPOINT_PATH)
# Assuming model_checkpoint_path looks something like:
#   /my-favorite-path/imagenet_train/model.ckpt-0,
# extract global_step from it.
global_step = MODEL_CHECKPOINT_PATH.split('/')[-1].split('-')[-1]
Example #22
0
def test():
    eval_set_queue = generate_eval_set()

    with tf.Graph().as_default() as g:
        img_placeholder = tf.placeholder(tf.float32,
                                         shape=[1, IMAGE_SIZE, IMAGE_SIZE, 3])

        logits, _, feature_map = inception.inference(img_placeholder,
                                                     NUM_CLASSES)

        with tf.name_scope('conv_aux_1') as scope:
            kernel1 = tf.Variable(tf.truncated_normal([3, 3, 288, 512],
                                                      dtype=tf.float32,
                                                      stddev=1e-4),
                                  name='weights')
            conv = tf.nn.conv2d(feature_map,
                                kernel1, [1, 1, 1, 1],
                                padding='SAME')
            biases1 = tf.Variable(tf.constant(0.1,
                                              shape=[512],
                                              dtype=tf.float32),
                                  trainable=True,
                                  name='biases')
            bias = tf.nn.bias_add(conv, biases1)
            conv_aux = tf.nn.relu(bias, name=scope)

        with tf.name_scope('conv_aux_2') as scope:
            kernel2 = tf.Variable(tf.truncated_normal([3, 3, 512, 512],
                                                      dtype=tf.float32,
                                                      stddev=1e-4),
                                  name='weights')
            conv = tf.nn.conv2d(conv_aux,
                                kernel2, [1, 1, 1, 1],
                                padding='SAME')
            biases2 = tf.Variable(tf.constant(0.1,
                                              shape=[512],
                                              dtype=tf.float32),
                                  trainable=True,
                                  name='biases')
            bias = tf.nn.bias_add(conv, biases2)
            conv_aux = tf.nn.relu(bias, name=scope)

        GAP = tf.reduce_mean(conv_aux, [1, 2])

        W = tf.get_variable(name='W',
                            shape=[512, 2],
                            initializer=tf.random_normal_initializer(0., 0.01))

        conv_map_resized = tf.image.resize_bilinear(conv_aux, [100, 100])

        # get weights connected to definite class.
        W_c = tf.gather(tf.transpose(W), 1)
        W_c = tf.reshape(W_c, [-1, 512, 1])
        conv_map_resized = tf.reshape(conv_map_resized, [-1, 100 * 100, 512])
        #CAM = tf.batch_matmul(conv_map_resized, W_c) tf.batch_matmul is deprecated in TF 1.12
        CAM = tf.matmul(conv_map_resized, W_c)
        CAM = tf.reshape(CAM, [-1, 100, 100])

        # Construct saver
        variables_to_restore = tf.get_collection(
            slim.variables.VARIABLES_TO_RESTORE)
        print variables_to_restore
        saver1 = tf.train.Saver(variables_to_restore)
        saver2 = tf.train.Saver(
            var_list=[W, kernel2, biases2, kernel1, biases1])

        with tf.Session() as sess:
            # restore model parameters.
            checkpoint1 = tf.train.get_checkpoint_state(
                FLAGS.classification_ckpt_restore_dir)
            if checkpoint1 and checkpoint1.model_checkpoint_path:
                saver1.restore(sess, checkpoint1.model_checkpoint_path)
                print("Successfully loaded:",
                      checkpoint1.model_checkpoint_path)
            else:
                print("Could not find old network weights")

            checkpoint2 = tf.train.get_checkpoint_state(
                FLAGS.segmentation_ckpt_restore_dir)
            if checkpoint2 and checkpoint2.model_checkpoint_path:
                saver2.restore(sess, checkpoint2.model_checkpoint_path)
                print("Successfully loaded:",
                      checkpoint2.model_checkpoint_path)
            else:
                print("Could not find old network weights")

            stats = {}
            stats['r'] = [0, 0, 0]  # [TP, FP, FN] for residential.
            stats['d'] = [0, 0, 0]  # [TP, FP, FN] for downtown/commercial.
            area_error = {}
            area_error['r'] = []
            area_error['d'] = []

            # store both true and estimate total pixel areas for each region
            true_total_area = {}
            for i in xrange(1, 66):
                true_total_area[i] = 0.0
            estimiate_total_area = {}
            for i in xrange(1, 66):
                estimiate_total_area[i] = 0.0

            for step in xrange(1, len(eval_set_queue) + 1):
                print('Processing ' + str(step) + '/' +
                      str(len(eval_set_queue)) + '...')
                img_path, label, region_index, img_index, region_type = eval_set_queue.pop(
                )
                img = load_image(img_path)
                img_batch = np.reshape(img, [1, IMAGE_SIZE, IMAGE_SIZE, 3])
                score = sess.run(logits,
                                 feed_dict={img_placeholder: img_batch})
                pos_prob = np.exp(
                    score[0, 1]) / (np.exp(score[0, 1]) + np.exp(score[0, 0]))

                if pos_prob >= 0.5:
                    # generate CAM for that sample
                    CAM_val = sess.run(CAM,
                                       feed_dict={img_placeholder: img_batch})
                    CAM_val = rescale_CAM(CAM_val)
                    pred_pixel_area = np.sum(
                        CAM_val > SEGMENTATION_THRES
                    )  # predicted or estimated pixel area
                    estimiate_total_area[region_index] += pred_pixel_area

                    if label == [0]:  # FP
                        stats[region_type][1] += 1
                        # save original image and CAM.
                        skimage.io.imsave(
                            os.path.join(
                                RESULT_DIR, 'FP',
                                str(region_index) + '_' + str(img_index) +
                                '_original.png'), img)
                        skimage.io.imsave(
                            os.path.join(
                                RESULT_DIR, 'FP',
                                str(region_index) + '_' + str(img_index) +
                                '_CAM.png'), CAM_val)

                    else:  # TP
                        stats[region_type][0] += 1
                        # save original image and CAM.
                        skimage.io.imsave(
                            os.path.join(
                                RESULT_DIR, 'TP',
                                str(region_index) + '_' + str(img_index) +
                                '_original.png'), img)
                        skimage.io.imsave(
                            os.path.join(
                                RESULT_DIR, 'TP',
                                str(region_index) + '_' + str(img_index) +
                                '_CAM.png'), CAM_val)
                        # compare with ground truth segmentation.
                        true_seg_img = skimage.io.imread(
                            os.path.join(FLAGS.eval_set_dir, str(region_index),
                                         str(img_index) + '_true_seg.png'))
                        true_seg_img /= 255.0
                        true_pixel_area = np.sum(true_seg_img)
                        true_pixel_area = true_pixel_area * (100 *
                                                             100) / (320 * 320)
                        true_total_area[region_index] += true_pixel_area
                        area_error[region_type].append(true_pixel_area -
                                                       pred_pixel_area)

                else:
                    if label == [1]:  # FN
                        stats[region_type][2] += 1
                        true_seg_img = skimage.io.imread(
                            os.path.join(FLAGS.eval_set_dir, str(region_index),
                                         str(img_index) + '_true_seg.png'))
                        true_seg_img /= 255.0
                        true_pixel_area = np.sum(true_seg_img)
                        true_pixel_area = true_pixel_area * (100 *
                                                             100) / (320 * 320)
                        true_total_area[region_index] += true_pixel_area

            # report precision and recall and absolute error rate.
            abs_error_sum_r = 0
            for e in area_error['r']:
                abs_error_sum_r += abs(e)
            abs_error_rate_r = float(abs_error_sum_r) / float(
                len(area_error['r']))

            abs_error_sum_d = 0
            for e in area_error['d']:
                abs_error_sum_d += abs(e)
            abs_error_rate_d = float(abs_error_sum_d) / float(
                len(area_error['d']))

            precision_r = float(
                stats['r'][0]) / float(stats['r'][0] + stats['r'][1] +
                                       0.00000001)
            recall_r = float(
                stats['r'][0]) / float(stats['r'][0] + stats['r'][2] +
                                       +0.00000001)

            precision_d = float(
                stats['d'][0]) / float(stats['d'][0] + stats['d'][1] +
                                       0.00000001)
            recall_d = float(
                stats['d'][0]) / float(stats['d'][0] + stats['d'][2] +
                                       +0.00000001)

            print('############ RESULTS ############')
            print('Residential: precision: ' + str(precision_r) + ' recall: ' +
                  str(recall_r) + ' average absolute error rate: ' +
                  str(abs_error_rate_r))
            print('Commercial: precision: ' + str(precision_d) + ' recall: ' +
                  str(recall_d) + ' average absolute error rate: ' +
                  str(abs_error_rate_d))

            # save csv for region-level comparison of true total area and estimated total area.
            result_list = []
            for i in xrange(1, 66):
                result_list.append([
                    i, true_total_area[i], estimiate_total_area[i],
                    float(estimiate_total_area[i] - true_total_area[i]) /
                    float(true_total_area[i])
                ])
            with open(os.path.join("region_level_area_estimation.csv"),
                      'wb') as f:
                writer = csv.writer(f)
                writer.writerow([
                    'region', 'true pixel area', 'estimiated pixel area',
                    'relative difference'
                ])
                writer.writerows(result_list)
            f.close()
Example #23
0
def export():
    # Create index->synset mapping
    synsets = []
    with open(SYNSET_FILE) as f:
        synsets = f.read().splitlines()
    # Create synset->metadata mapping
    texts = {}
    with open(METADATA_FILE) as f:
        for line in f.read().splitlines():
            parts = line.split('\t')
            assert len(parts) == 2
            texts[parts[0]] = parts[1]

    with tf.Graph().as_default():
        # Build inference model.
        # Please refer to Tensorflow inception model for details.

        # Input transformation.
        jpegs = tf.placeholder(tf.string)
        images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

        # Run inference.
        logits, _, endpoints = inception_model.inference(
            images, NUM_CLASSES + 1)

        # Transform output to topK result.
        values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)
        #TODO change values-->features [flatted]
        #(?,8,8,2048)
        features = endpoints['mixed_8x8x2048b']
        features = tf.reduce_mean(features, 1)
        #(?,2048)
        features = tf.reduce_mean(features, 1)

        # sys.exit()
        # Create a constant string Tensor where the i'th element is
        # the human readable class description for the i'th index.
        # Note that the 0th index is an unused background class
        # (see inception model definition code).
        class_descriptions = ['unused background']
        for s in synsets:
            class_descriptions.append(texts[s])
        class_tensor = tf.constant(class_descriptions)

        classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices),
                                                    mapping=class_tensor)

        # Restore variables from training checkpoint.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception_model.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        with tf.Session() as sess:
            # Restore variables from training checkpoints.
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                # Assuming model_checkpoint_path looks something like:
                #   /my-favorite-path/imagenet_train/model.ckpt-0,
                # extract global_step from it.
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                print('Successfully loaded model from %s at step=%s.' %
                      (ckpt.model_checkpoint_path, global_step))
            else:
                print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
                return

            # Export inference model.
            init_op = tf.group(tf.initialize_all_tables(), name='init_op')
            model_exporter = exporter.Exporter(saver)
            # change scores values to features ?
            signature = exporter.classification_signature(
                input_tensor=jpegs,
                classes_tensor=classes,
                scores_tensor=features)
            model_exporter.init(default_graph_signature=signature,
                                init_op=init_op)
            model_exporter.export(FLAGS.export_dir, tf.constant(global_step),
                                  sess)
            print('Successfully exported model to %s' % FLAGS.export_dir)
def train(target, dataset, cluster_spec):
  """Train Inception on a dataset for a number of steps."""
  # Number of workers and parameter servers are infered from the workers and ps
  # hosts string.
  num_workers = len(cluster_spec.as_dict()['worker'])
  num_parameter_servers = len(cluster_spec.as_dict()['ps'])
  # If no value is given, num_replicas_to_aggregate defaults to be the number of
  # workers.
  if FLAGS.num_replicas_to_aggregate == -1:
    num_replicas_to_aggregate = num_workers
  else:
    num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate

  # Both should be greater than 0 in a distributed training.
  assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and '
                                                         'num_parameter_servers'
                                                         ' must be > 0.')

  # Choose worker 0 as the chief. Note that any worker could be the chief
  # but there should be only one chief.
  is_chief = (FLAGS.task_id == 0)

  # Ops are assigned to worker by default.
  with tf.device('/job:worker/task:%d' % FLAGS.task_id):
    # Variables and its related init/assign ops are assigned to ps.
    with slim.scopes.arg_scope(
        [slim.variables.variable, slim.variables.global_step],
        device=slim.variables.VariableDeviceChooser(num_parameter_servers)):
      # Create a variable to count the number of train() calls. This equals the
      # number of updates applied to the variables.
      global_step = slim.variables.global_step()

      # Calculate the learning rate schedule.
      num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                               FLAGS.batch_size)
      # Decay steps need to be divided by the number of replicas to aggregate.
      decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay /
                        num_replicas_to_aggregate)

      # Decay the learning rate exponentially based on the number of steps.
      lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                      global_step,
                                      decay_steps,
                                      FLAGS.learning_rate_decay_factor,
                                      staircase=True)
      # Add a summary to track the learning rate.
      tf.scalar_summary('learning_rate', lr)

      # Create an optimizer that performs gradient descent.
      opt = tf.train.RMSPropOptimizer(lr,
                                      RMSPROP_DECAY,
                                      momentum=RMSPROP_MOMENTUM,
                                      epsilon=RMSPROP_EPSILON)

      images, labels = image_processing.distorted_inputs(
          dataset,
          batch_size=FLAGS.batch_size,
          num_preprocess_threads=FLAGS.num_preprocess_threads)

      # Number of classes in the Dataset label set plus 1.
      # Label 0 is reserved for an (unused) background class.
      num_classes = dataset.num_classes() + 1
      logits = inception.inference(images, num_classes, for_training=True)
      # Add classification loss.
      inception.loss(logits, labels)

      # Gather all of the losses including regularization losses.
      losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
      losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

      total_loss = tf.add_n(losses, name='total_loss')

      if is_chief:
        # Compute the moving average of all individual losses and the
        # total loss.
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        # Attach a scalar summmary to all individual losses and the total loss;
        # do the same for the averaged version of the losses.
        for l in losses + [total_loss]:
          loss_name = l.op.name
          # Name each loss as '(raw)' and name the moving average version of the
          # loss as the original loss name.
          tf.scalar_summary(loss_name + ' (raw)', l)
          tf.scalar_summary(loss_name, loss_averages.average(l))

        # Add dependency to compute loss_averages.
        with tf.control_dependencies([loss_averages_op]):
          total_loss = tf.identity(total_loss)

      # Track the moving averages of all trainable variables.
      # Note that we maintain a 'double-average' of the BatchNormalization
      # global statistics.
      # This is not needed when the number of replicas are small but important
      # for synchronous distributed training with tens of workers/replicas.
      exp_moving_averager = tf.train.ExponentialMovingAverage(
          inception.MOVING_AVERAGE_DECAY, global_step)

      variables_to_average = (
          tf.trainable_variables() + tf.moving_average_variables())

      # Add histograms for model variables.
      for var in variables_to_average:
        tf.histogram_summary(var.op.name, var)

      # Create synchronous replica optimizer.
      opt = tf.train.SyncReplicasOptimizer(
          opt,
          replicas_to_aggregate=num_replicas_to_aggregate,
          replica_id=FLAGS.task_id,
          total_num_replicas=num_workers,
          variable_averages=exp_moving_averager,
          variables_to_average=variables_to_average)

      batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)
      assert batchnorm_updates, 'Batchnorm updates are missing'
      batchnorm_updates_op = tf.group(*batchnorm_updates)
      # Add dependency to compute batchnorm_updates.
      with tf.control_dependencies([batchnorm_updates_op]):
        total_loss = tf.identity(total_loss)

      # Compute gradients with respect to the loss.
      grads = opt.compute_gradients(total_loss)

      # Add histograms for gradients.
      for grad, var in grads:
        if grad is not None:
          tf.histogram_summary(var.op.name + '/gradients', grad)

      apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)

      with tf.control_dependencies([apply_gradients_op]):
        train_op = tf.identity(total_loss, name='train_op')

      # Get chief queue_runners, init_tokens and clean_up_op, which is used to
      # synchronize replicas.
      # More details can be found in sync_replicas_optimizer.
      chief_queue_runners = [opt.get_chief_queue_runner()]
      init_tokens_op = opt.get_init_tokens_op()
      clean_up_op = opt.get_clean_up_op()

      # Create a saver.
      saver = tf.train.Saver()

      # Build the summary operation based on the TF collection of Summaries.
      summary_op = tf.merge_all_summaries()

      # Build an initialization operation to run below.
      init_op = tf.initialize_all_variables()

      # We run the summaries in the same thread as the training operations by
      # passing in None for summary_op to avoid a summary_thread being started.
      # Running summaries and training operations in parallel could run out of
      # GPU memory.
      sv = tf.train.Supervisor(is_chief=is_chief,
                               logdir=FLAGS.train_dir,
                               init_op=init_op,
                               summary_op=None,
                               global_step=global_step,
                               saver=saver,
                               save_model_secs=FLAGS.save_interval_secs)

      tf.logging.info('%s Supervisor' % datetime.now())

      sess_config = tf.ConfigProto(
          allow_soft_placement=True,
          log_device_placement=FLAGS.log_device_placement)

      # Get a session.
      sess = sv.prepare_or_wait_for_session(target, config=sess_config)

      # Start the queue runners.
      queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
      sv.start_queue_runners(sess, queue_runners)
      tf.logging.info('Started %d queues for processing input data.',
                      len(queue_runners))

      if is_chief:
        sv.start_queue_runners(sess, chief_queue_runners)
        sess.run(init_tokens_op)

      # Train, checking for Nans. Concurrently run the summary operation at a
      # specified interval. Note that the summary_op and train_op never run
      # simultaneously in order to prevent running out of GPU memory.
      next_summary_time = time.time() + FLAGS.save_summaries_secs
      while not sv.should_stop():
        try:
          start_time = time.time()
          loss_value, step = sess.run([train_op, global_step])
          assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
          if step > FLAGS.max_steps:
            break
          duration = time.time() - start_time

          if step % 30 == 0:
            examples_per_sec = FLAGS.batch_size / float(duration)
            format_str = ('Worker %d: %s: step %d, loss = %.2f'
                          '(%.1f examples/sec; %.3f  sec/batch)')
            tf.logging.info(format_str %
                            (FLAGS.task_id, datetime.now(), step, loss_value,
                             examples_per_sec, duration))

          # Determine if the summary_op should be run on the chief worker.
          if is_chief and next_summary_time < time.time():
            tf.logging.info('Running Summary operation on the chief.')
            summary_str = sess.run(summary_op)
            sv.summary_computed(sess, summary_str)
            tf.logging.info('Finished running Summary operation.')

            # Determine the next time for running the summary.
            next_summary_time += FLAGS.save_summaries_secs
        except:
          if is_chief:
            tf.logging.info('About to execute sync_clean_up_op!')
            sess.run(clean_up_op)
          raise

      # Stop the supervisor.  This also waits for service threads to finish.
      sv.stop()

      # Save after the training ends.
      if is_chief:
        saver.save(sess,
                   os.path.join(FLAGS.train_dir, 'model.ckpt'),
                   global_step=global_step)
Example #25
0
def train():
    # load train set list and transform it to queue.
    try:
        with open('train_set_list.pickle', 'r') as f:
            train_set_list = pickle.load(f)
    except:
        raise EnvironmentError(
            'Data list not existed. Please run generate_data_list.py first.')
    random.shuffle(train_set_list)
    train_set_queue = deque(train_set_list)
    train_set_size = len(train_set_list)
    del train_set_list
    print('Training set built. Size: ' + str(train_set_size))

    # build the tensorflow graph.
    with tf.Graph().as_default() as g:

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        num_batches_per_epoch = train_set_size / BATCH_SIZE
        decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                        global_step,
                                        decay_steps,
                                        FLAGS.learning_rate_decay_factor,
                                        staircase=True)
        tf.summary.scalar('learning_rate', lr)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.RMSPropOptimizer(lr,
                                        RMSPROP_DECAY,
                                        momentum=RMSPROP_MOMENTUM,
                                        epsilon=RMSPROP_EPSILON)

        images = tf.placeholder(tf.float32,
                                shape=[BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3])

        labels = tf.placeholder(tf.int32, shape=[BATCH_SIZE])

        logits = inception.inference(images,
                                     NUM_CLASSES,
                                     for_training=True,
                                     restore_logits=FLAGS.fine_tune,
                                     scope=None)

        inception.loss(logits, labels, batch_size=BATCH_SIZE)

        # Assemble all of the losses for the current tower only.
        losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope=None)

        # Calculate the total loss for the current tower.
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n(losses + regularization_losses,
                              name='total_loss')

        # Compute the moving average of all individual losses and the total loss.
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        # same for the averaged version of the losses.
        for l in losses + [total_loss]:
            # Name each loss as '(raw)' and name the moving average version of the loss
            # as the original loss name.
            tf.summary.scalar(l.op.name + ' (raw)', l)
            tf.summary.scalar(l.op.name, loss_averages.average(l))

        with tf.control_dependencies([loss_averages_op]):
            total_loss = tf.identity(total_loss)

        batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION,
                                              scope=None)

        # Calculate the gradients for the batch of data on this ImageNet
        # tower.
        grads = opt.compute_gradients(total_loss)

        # Apply gradients.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradients', grad)

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY, global_step)

        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        # Group all updates to into a single train op.
        batchnorm_updates_op = tf.group(*batchnorm_updates)
        train_op = tf.group(apply_gradient_op, variables_averages_op,
                            batchnorm_updates_op)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # open session and initialize
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        sess.run(init)

        # restore old checkpoint
        if FLAGS.fine_tune:
            checkpoint = tf.train.get_checkpoint_state(FLAGS.ckpt_restore_dir)
            if checkpoint and checkpoint.model_checkpoint_path:
                saver.restore(sess, checkpoint.model_checkpoint_path)
                print("Successfully loaded:", checkpoint.model_checkpoint_path)
            else:
                print("Could not find old network weights")
        else:
            variables_to_restore = tf.get_collection(
                slim.variables.VARIABLES_TO_RESTORE)
            restorer = tf.train.Saver(variables_to_restore)
            restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
            print('%s: Pre-trained model restored from %s' %
                  (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

        summary_writer = tf.summary.FileWriter(
            FLAGS.ckpt_save_dir,
            graph_def=sess.graph.as_graph_def(add_shapes=True))

        step = 1
        while step <= FLAGS.max_steps:
            start_time = time.time()
            # construct image batch and label batch for one step train
            minibatch = []
            for count in xrange(0, BATCH_SIZE):
                element = train_set_queue.pop()
                minibatch.append(element)
                train_set_queue.appendleft(element)

            image_list = [load_image(d[0]) for d in minibatch]
            label_list = [d[1] for d in minibatch]

            image_batch = np.array(image_list)
            label_batch = np.array(label_list)

            image_batch = np.reshape(image_batch,
                                     [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3])
            label_batch = np.reshape(label_batch, [BATCH_SIZE])

            _, loss_value = sess.run([train_op, total_loss],
                                     feed_dict={
                                         images: image_batch,
                                         labels: label_batch
                                     })

            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step == 1 or step % 10 == 0:
                num_examples_per_step = BATCH_SIZE
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')

                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            # shuttle the image list per epoch
            if step % num_batches_per_epoch == 0:
                random.shuffle(train_set_queue)

            # write summary periodically
            if step == 1 or step % 100 == 0:
                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           images: image_batch,
                                           labels: label_batch
                                       })
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0:
                checkpoint_path = os.path.join(FLAGS.ckpt_save_dir,
                                               'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            step += 1
Example #26
0
def export():
  # Create index->synset mapping
  synsets = []
  with open(SYNSET_FILE) as f:
    synsets = f.read().splitlines()
  # Create synset->metadata mapping
  texts = {}
  with open(METADATA_FILE) as f:
    for line in f.read().splitlines():
      parts = line.split('\t')
      assert len(parts) == 2
      texts[parts[0]] = parts[1]

  with tf.Graph().as_default():
    # Build inference model.
    # Please refer to Tensorflow inception model for details.

    # Input transformation.
    serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
    feature_configs = {
        'image/encoded': tf.FixedLenFeature(
            shape=[], dtype=tf.string),
    }
    tf_example = tf.parse_example(serialized_tf_example, feature_configs)
    jpegs = tf_example['image/encoded']
    images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

    # Run inference.
    logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

    # Transform output to topK result.
    values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)

    # Create a constant string Tensor where the i'th element is
    # the human readable class description for the i'th index.
    # Note that the 0th index is an unused background class
    # (see inception model definition code).
    class_descriptions = ['unused background']
    for s in synsets:
      class_descriptions.append(texts[s])
    class_tensor = tf.constant(class_descriptions)

    table = tf.contrib.lookup.index_to_string_table_from_tensor(class_tensor)
    classes = table.lookup(tf.to_int64(indices))

    # Restore variables from training checkpoint.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception_model.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    with tf.Session() as sess:
      # Restore variables from training checkpoints.
      ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/imagenet_train/model.ckpt-0,
        # extract global_step from it.
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print 'Successfully loaded model from %s at step=%s.' % (
            ckpt.model_checkpoint_path, global_step)
      else:
        print 'No checkpoint file found at %s' % FLAGS.checkpoint_dir
        return

      # Export inference model.
      output_path = os.path.join(
          tf.compat.as_bytes(FLAGS.output_dir),
          tf.compat.as_bytes(str(FLAGS.model_version)))
      print 'Exporting trained model to', output_path
      builder = tf.saved_model.builder.SavedModelBuilder(output_path)

      # Build the signature_def_map.
      classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(
          serialized_tf_example)
      classes_output_tensor_info = tf.saved_model.utils.build_tensor_info(
          classes)
      scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(values)

      classification_signature = (
          tf.saved_model.signature_def_utils.build_signature_def(
              inputs={
                  tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                      classify_inputs_tensor_info
              },
              outputs={
                  tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
                      classes_output_tensor_info,
                  tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                      scores_output_tensor_info
              },
              method_name=tf.saved_model.signature_constants.
              CLASSIFY_METHOD_NAME))

      predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(jpegs)
      prediction_signature = (
          tf.saved_model.signature_def_utils.build_signature_def(
              inputs={'images': predict_inputs_tensor_info},
              outputs={
                  'classes': classes_output_tensor_info,
                  'scores': scores_output_tensor_info
              },
              method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
          ))

      legacy_init_op = tf.group(
          tf.tables_initializer(), name='legacy_init_op')
      builder.add_meta_graph_and_variables(
          sess, [tf.saved_model.tag_constants.SERVING],
          signature_def_map={
              'predict_images':
                  prediction_signature,
              tf.saved_model.signature_constants.
              DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                  classification_signature,
          },
          legacy_init_op=legacy_init_op)

      builder.save()
      print 'Successfully exported model to %s' % FLAGS.output_dir
def evaluate(dataset):
    """Evaluate model on Dataset for a number of steps."""
    with tf.Graph().as_default():
        # Get images and labels from the dataset.
        images, labels = image_processing.inputs(dataset)

        # Number of classes in the Dataset label set plus 1.
        # Label 0 is reserved for an (unused) background class.
        num_classes = dataset.num_classes() + 1

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, _ = inception.inference(images, num_classes)
        pred = tf.nn.softmax(logits)

        top_1_op = tf.nn.in_top_k(logits, labels, 1)

        # Calculate predictions.
        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        graph_def = tf.get_default_graph().as_graph_def()
        summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                                graph_def=graph_def)

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                if os.path.isabs(ckpt.model_checkpoint_path):
                    # Restores from checkpoint with absolute path.
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    # Restores from checkpoint with relative path.
                    saver.restore(
                        sess,
                        os.path.join(FLAGS.checkpoint_dir,
                                     ckpt.model_checkpoint_path))

                # Assuming model_checkpoint_path looks something like:
                #   /my-favorite-path/imagenet_train/model.ckpt-0,
                # extract global_step from it.
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                print('Succesfully loaded model from %s at step=%s.' %
                      (ckpt.model_checkpoint_path, global_step))
            else:
                print('No checkpoint file found')
                return

            # Start the queue runners.
            coord = tf.train.Coordinator()
            try:
                threads = []
                for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                    threads.extend(
                        qr.create_threads(sess,
                                          coord=coord,
                                          daemon=True,
                                          start=True))
                num_iter = int(math.ceil(FLAGS.num_examples /
                                         FLAGS.batch_size))
                # Counts the number of correct predictions.
                test_acc = 0.0
                count_top_1 = 0
                confusion_m_all = []
                total_sample_count = num_iter * FLAGS.batch_size
                step = 0

                print('%s: starting evaluation on (%s).' %
                      (datetime.now(), FLAGS.subset))
                start_time = time.time()
                while step < num_iter and not coord.should_stop():
                    pred, labels, top_1 = sess.run([pred, labels, top_1_op])
                    print(pred.shape)
                    print(labels.shape)
                    #correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1))
                    correct_pred = np.equal(np.argmax(pred, 1), labels)
                    #print (correct_pred)
                    test_acc += np.sum(correct_pred.astype(float))

                    confu_m = confusion_matrix(labels, np.argmax(
                        pred, 1))  #(np.argmax(labels,1), np.argmax(pred,1))
                    confusion_m_all.append(confu_m)
                    #top_1, top_5 = sess.run([top_1_op, top_5_op])
                    count_top_1 += np.sum(top_1)
                    #count_top_5 += np.sum(top_5)
                    step += 1
                    '''
	        if step % 20 == 0:
	          duration = time.time() - start_time
	          sec_per_batch = duration / 20.0
	          examples_per_sec = FLAGS.batch_size / sec_per_batch
	          print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
	                'sec/batch)' % (datetime.now(), step, num_iter,
	                                examples_per_sec, sec_per_batch))
	          start_time = time.time()
	        '''

                # Compute precision @ 1
                '''
	      precision_at_1 = count_top_1 / total_sample_count
	      #recall_at_5 = count_top_5 / total_sample_count
	      print('%s: precision @ 1 = %.4f  [%d examples]' %
	            (datetime.now(), precision_at_1, total_sample_count))
	      '''
                print(confusion_m_all.shape)
                exit()
                confusion_m_average = np.sum(confusion_m_all, axis=0)
                print(confusion_m_average)

                test_acc = float(test_acc) / float(total_sample_count)
                print("Test Accuracy: {} \n".format(test_acc))

                summary = tf.Summary()
                summary.ParseFromString(sess.run(summary_op))
                summary.value.add(tag='Precision @ 1',
                                  simple_value=precision_at_1)
                #summary.value.add(tag='Recall @ 5', simple_value=recall_at_5)
                summary_writer.add_summary(summary, global_step)

            except Exception as e:  # pylint: disable=broad-except
                coord.request_stop(e)

            coord.request_stop()
            coord.join(threads, stop_grace_period_secs=10)
Example #28
0
def export():
  # Create index->synset mapping
  synsets = []
  with open(SYNSET_FILE) as f:
    synsets = f.read().splitlines()
  # Create synset->metadata mapping
  texts = {}
  with open(METADATA_FILE) as f:
    for line in f.read().splitlines():
      parts = line.split('\t')
      assert len(parts) == 2
      texts[parts[0]] = parts[1]

  with tf.Graph().as_default():
    # Build inference model.
    # Please refer to Tensorflow inception model for details.

    # Input transformation.
    # TODO(b/27776734): Add batching support.
    jpegs = tf.placeholder(tf.string, shape=(1))
    image_buffer = tf.squeeze(jpegs, [0])
    # Decode the string as an RGB JPEG.
    # Note that the resulting image contains an unknown height and width
    # that is set dynamically by decode_jpeg. In other words, the height
    # and width of image is unknown at compile-time.
    image = tf.image.decode_jpeg(image_buffer, channels=3)
    # After this point, all image pixels reside in [0,1)
    # until the very end, when they're rescaled to (-1, 1).  The various
    # adjust_* ops all require this range for dtype float.
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    # Crop the central region of the image with an area containing 87.5% of
    # the original image.
    image = tf.image.central_crop(image, central_fraction=0.875)
    # Resize the image to the original height and width.
    image = tf.expand_dims(image, 0)
    image = tf.image.resize_bilinear(image,
                                     [FLAGS.image_size, FLAGS.image_size],
                                     align_corners=False)
    image = tf.squeeze(image, [0])
    # Finally, rescale to [-1,1] instead of [0, 1)
    image = tf.sub(image, 0.5)
    image = tf.mul(image, 2.0)
    images = tf.expand_dims(image, 0)

    # Run inference.
    logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

    # Transform output to topK result.
    values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)

    # Create a constant string Tensor where the i'th element is
    # the human readable class description for the i'th index.
    class_tensor = tf.constant([texts[s] for s in synsets])

    classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices),
                                                mapping=class_tensor)

    # Restore variables from training checkpoint.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception_model.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    with tf.Session() as sess:
      # Restore variables from training checkpoints.
      ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/imagenet_train/model.ckpt-0,
        # extract global_step from it.
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('Successfully loaded model from %s at step=%s.' %
              (ckpt.model_checkpoint_path, global_step))
      else:
        print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
        return

      # Export inference model.
      init_op = tf.group(tf.initialize_all_tables(), name='init_op')
      model_exporter = exporter.Exporter(saver)
      signature = exporter.classification_signature(
          input_tensor=jpegs, classes_tensor=classes, scores_tensor=values)
      model_exporter.init(default_graph_signature=signature, init_op=init_op)
      model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess)
      print('Successfully exported model to %s' % FLAGS.export_dir)
def export():
    # Create index->synset mapping
    synsets = []
    with open(SYNSET_FILE) as f:
        synsets = f.read().splitlines()
    # Create synset->metadata mapping
    texts = {}
    with open(METADATA_FILE) as f:
        for line in f.read().splitlines():
            parts = line.split('\t')
            assert len(parts) == 2
            texts[parts[0]] = parts[1]

    with tf.Graph().as_default():
        # Build inference model.
        # Please refer to Tensorflow inception model for details.

        # Input transformation.
        serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
        feature_configs = {
            'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string),
        }
        tf_example = tf.parse_example(serialized_tf_example, feature_configs)
        jpegs = tf_example['image/encoded']
        images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

        # Run inference.
        logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

        # Transform output to topK result.
        values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)

        # Create a constant string Tensor where the i'th element is
        # the human readable class description for the i'th index.
        # Note that the 0th index is an unused background class
        # (see inception model definition code).
        class_descriptions = ['unused background']
        for s in synsets:
            class_descriptions.append(texts[s])
        class_tensor = tf.constant(class_descriptions)

        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            class_tensor)
        classes = table.lookup(tf.to_int64(indices))

        # Restore variables from training checkpoint.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception_model.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        with tf.Session() as sess:
            # Restore variables from training checkpoints.
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                # Assuming model_checkpoint_path looks something like:
                #   /my-favorite-path/imagenet_train/model.ckpt-0,
                # extract global_step from it.
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                print('Successfully loaded model from %s at step=%s.' %
                      (ckpt.model_checkpoint_path, global_step))
            else:
                print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
                return

            # Export inference model.
            init_op = tf.group(tf.tables_initializer(), name='init_op')
            classification_signature = exporter.classification_signature(
                input_tensor=serialized_tf_example,
                classes_tensor=classes,
                scores_tensor=values)
            named_graph_signature = {
                'inputs':
                exporter.generic_signature({'images': jpegs}),
                'outputs':
                exporter.generic_signature({
                    'classes': classes,
                    'scores': values
                })
            }
            model_exporter = exporter.Exporter(saver)
            model_exporter.init(
                init_op=init_op,
                default_graph_signature=classification_signature,
                named_graph_signatures=named_graph_signature)
            model_exporter.export(FLAGS.export_dir, tf.constant(global_step),
                                  sess)
            print('Successfully exported model to %s' % FLAGS.export_dir)
Example #30
0
File: util.py Project: kuprel/skin
def spatial_tranform(images0, batch_size, subset, loc_net, xform_reg):

    images1 = tf.pack([
        tf.image.resize_images(i, 299, 299)
        for i in images0
    ])

    with tf.name_scope(None):
        images1 = tf.identity(images1, name='input_stn')

    with tf.variable_scope('loc_net') as scope:

        if loc_net == 'fc':
            print 'using fully connected localization network'
            theta = loc_net_fc(images1, batch_size)

        if loc_net == 'conv':
            print 'using convolutional localization network'
            theta = loc_net_conv(images1, batch_size)

        if loc_net == 'inception':
            print 'using inception localization network'
            theta, _ = inception_model.inference(
                images = (images1-128)/128.,
                num_classes = 3,
                for_training = (subset == 'train'),
                restore_logits = (subset != 'train')
            )
            theta = tf.nn.tanh(theta)

    with tf.name_scope(None):
        theta = tf.identity(theta, name='theta')
    tf.histogram_summary('theta/zoom', theta[:,0])
    tf.histogram_summary('theta/pan_horizontal', theta[:,1])
    tf.histogram_summary('theta/pan_vertical', theta[:,2])

    if subset == 'train':
        with tf.name_scope(None):
            theta_loss = tf.nn.l2_loss(theta, name='theta_loss')
        tf.scalar_summary('theta_loss', theta_loss)
        tf.add_to_collection('regularization_losses', xform_reg*theta_loss)

    images2 = []
    for i in range(batch_size):
        s, dx, dy = (theta[i,0]+1)/2, theta[i,1], theta[i,2]
        th = tf.pack([s, 0, dx,
                      0, s, dy])
        u = images0[i]
        u, th = tf.expand_dims(u, 0), tf.expand_dims(th, 0)
        dsf = tf.cast(tf.shape(u)[1], 'float32') / 299
        v = spatial_transformer.transformer(u, th, dsf)
        v = tf.image.resize_images(v[0,:,:,:], 299, 299)
        v.set_shape([299, 299, 3])
        images2.append(v)
    images2 = tf.pack(images2)

    images12 = tf.concat(2, [images1, images2])
    blkbar = tf.zeros([batch_size, 299/2, 299*2, 3])
    whtbar = 255*tf.ones([batch_size, 299/2, 299*2, 3])
    images12 = tf.concat(1, [whtbar, images12, whtbar])
    images12 = tf.clip_by_value(images12, 0, 255)
    tf.image_summary('xform_pairs', images12, max_images=batch_size)

    return images2
Example #31
0
def export():
    #   # Create index->synset mapping
    #   synsets = []
    #   with open(SYNSET_FILE) as f:
    #     synsets = f.read().splitlines()
    #   # Create synset->metadata mapping
    #   texts = {}
    #   with open(METADATA_FILE) as f:
    #     for line in f.read().splitlines():
    #       parts = line.split('\t')
    #       assert len(parts) == 2
    #       texts[parts[0]] = parts[1]

    with tf.Graph().as_default():
        # Build inference model.
        # Please refer to Tensorflow inception model for details.

        # Input transformation.
        serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
        feature_configs = {
            'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string),
        }
        tf_example = tf.parse_example(serialized_tf_example, feature_configs)
        jpegs = tf_example['image/encoded']
        images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

        # Run inference.
        logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

        # Transform output to topK result.
        values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES)

        # Create a constant string Tensor where the i'th element is
        # the human readable class description for the i'th index.
        # Note that the 0th index is an unused background class
        # (see inception model definition code).
        class_descriptions = ['unused background']
        for s in synsets:
            class_descriptions.append(texts[s])
        class_tensor = tf.constant(class_descriptions)

        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            class_tensor)
        classes = table.lookup(tf.to_int64(indices))

        # Restore variables from training checkpoint.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception_model.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        with tf.Session() as sess:
            # Restore variables from training checkpoints.
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                # Assuming model_checkpoint_path looks something like:
                #   /my-favorite-path/imagenet_train/model.ckpt-0,
                # extract global_step from it.
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                print('Successfully loaded model from %s at step=%s.' %
                      (ckpt.model_checkpoint_path, global_step))
            else:
                print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
                return

            # Export inference model.
            output_path = os.path.join(
                tf.compat.as_bytes(FLAGS.output_dir),
                tf.compat.as_bytes(str(FLAGS.model_version)))
            print('Exporting trained model to', output_path)
            builder = tf.saved_model.builder.SavedModelBuilder(output_path)

            # Build the signature_def_map.
            classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(
                serialized_tf_example)
            classes_output_tensor_info = tf.saved_model.utils.build_tensor_info(
                classes)
            scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(
                values)

            classification_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={
                        tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                        classify_inputs_tensor_info
                    },
                    outputs={
                        tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
                        classes_output_tensor_info,
                        tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                        scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.
                    CLASSIFY_METHOD_NAME))

            predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(
                jpegs)
            prediction_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={'images': predict_inputs_tensor_info},
                    outputs={
                        'classes': classes_output_tensor_info,
                        'scores': scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.
                    PREDICT_METHOD_NAME))

            legacy_init_op = tf.group(tf.tables_initializer(),
                                      name='legacy_init_op')
            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={
                    'predict_images':
                    prediction_signature,
                    tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    classification_signature,
                },
                legacy_init_op=legacy_init_op)

            builder.save()
            print('Successfully exported model to %s' % FLAGS.output_dir)