Ejemplo n.º 1
0
def main(_):
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    if tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.DeleteRecursively(FLAGS.train_dir)
    tf.gfile.MakeDirs(FLAGS.train_dir)
    inception_train.train(dataset)
Ejemplo n.º 2
0
def main(unused_argv=None):
  dataset = ImagenetData(subset=FLAGS.subset)
  assert dataset.data_files()
  if tf.gfile.Exists(FLAGS.eval_dir):
    tf.gfile.DeleteRecursively(FLAGS.eval_dir)
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  inception_eval.evaluate(dataset)
def main(unused_args):
  assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker'

  # Extract all the hostnames for the ps and worker jobs to construct the
  # cluster spec.
  ps_hosts = FLAGS.ps_hosts.split(',')
  worker_hosts = FLAGS.worker_hosts.split(',')
  tf.logging.info('PS hosts are: %s' % ps_hosts)
  tf.logging.info('Worker hosts are: %s' % worker_hosts)

  cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,
                                       'worker': worker_hosts})
  server = tf.train.Server(
      {'ps': ps_hosts,
       'worker': worker_hosts},
      job_name=FLAGS.job_name,
      task_index=FLAGS.task_id,
      protocol=FLAGS.protocol)

  if FLAGS.job_name == 'ps':
    # `ps` jobs wait for incoming connections from the workers.
    server.join()
  else:
    # `worker` jobs will actually do the work.
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    # Only the chief checks for or creates train_dir.
    if FLAGS.task_id == 0:
      if not tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.MakeDirs(FLAGS.train_dir)
    inception_distributed_train.train(server.target, dataset, cluster_spec)
Ejemplo n.º 4
0
def main(unused_args):
  assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker'

  # Extract all the hostnames for the ps and worker jobs to construct the
  # cluster spec.
  ps_hosts = FLAGS.ps_hosts.split(',')
  worker_hosts = FLAGS.worker_hosts.split(',')
  tf.logging.info('PS hosts are: %s' % ps_hosts)
  tf.logging.info('Worker hosts are: %s' % worker_hosts)

  cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,
                                       'worker': worker_hosts})
  server = tf.train.Server(
      {'ps': ps_hosts,
       'worker': worker_hosts},
      job_name=FLAGS.job_name,
      task_index=FLAGS.task_id)

  if FLAGS.job_name == 'ps':
    # `ps` jobs wait for incoming connections from the workers.
    server.join()
  else:
    # `worker` jobs will actually do the work.
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    # Only the chief checks for or creates train_dir.
    if FLAGS.task_id == 0:
      if not tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.MakeDirs(FLAGS.train_dir)
    inception_distributed_train.train(server.target, dataset, cluster_spec)
Ejemplo n.º 5
0
def main(unused_argv=None):
  dataset = ImagenetData(subset=FLAGS.subset)
  assert dataset.data_files()
  if tf.gfile.Exists(FLAGS.eval_dir):
    tf.gfile.DeleteRecursively(FLAGS.eval_dir)
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  inception_eval.evaluate(dataset)
Ejemplo n.º 6
0
def main(unused_argv):
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.worker_index)

    if FLAGS.job_name == "ps":
        server.join()
        sys.exit(0)

    # `worker` jobs will actually do the work.
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    # Only the chief checks for or creates train_dir.
    if FLAGS.task_id == 0:
        if not tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.MakeDirs(FLAGS.train_dir)
            inception_distributed_train.train(server.target, dataset,
                                              cluster_spec)

    num_workers = len(worker_hosts)
    worker_grpc_url = 'grpc://' + worker_hosts[0]
    print("Worker GRPC URL: %s" % worker_grpc_url)
    print("Worker index = %d" % FLAGS.worker_index)
    print("Number of workers = %d" % num_workers)
Ejemplo n.º 7
0
def main(_) :
    dataset = ImagenetData(subset = FLAGS.subset)
    assert dataset.data_files()
    if tf.gfile.Exists(FLAGS.train_dir) :
        tf.gfile.DeleteRecursively(FLAGS.train_dir)
    tf.gfile.MakeDirs(FLAGS.train_dir)
    inception_train.train(dataset)
Ejemplo n.º 8
0
def main(unused_argv=None):
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    if tf.gfile.Exists(FLAGS.eval_dir):
        tf.gfile.DeleteRecursively(FLAGS.eval_dir)
    tf.gfile.MakeDirs(FLAGS.eval_dir)
    FLAGS.dataset_name = 'imagenet'
    FLAGS.num_examples = dataset.num_examples_per_epoch()
    inception_eval.evaluate(dataset)
def main(_):
    # Load dataset
    tf.app.flags.FLAGS.data_dir = '/work/haeusser/data/imagenet/shards'
    dataset = ImagenetData(subset='validation')
    assert dataset.data_files()

    num_labels = dataset.num_classes() + 1
    image_shape = [FLAGS.image_size, FLAGS.image_size, 3]

    graph = tf.Graph()
    with graph.as_default():

        images, labels = image_processing.batch_inputs(
            dataset,
            32,
            train=True,
            num_preprocess_threads=16,
            num_readers=FLAGS.num_readers)

        # Set up semisup model.
        model = semisup.SemisupModel(semisup.architectures.inception_model,
                                     num_labels,
                                     image_shape,
                                     test_in=images)

        # Add moving average variables.
        for var in tf.get_collection('moving_vars'):
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
        for var in slim.get_model_variables():
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)

        # Get prediction tensor from semisup model.
        predictions = tf.argmax(model.test_logit, 1)

        # Accuracy metric for summaries.
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
        })
        for name, value in names_to_values.iteritems():
            tf.summary.scalar(name, value)

        # Run the actual evaluation loop.
        num_batches = math.ceil(dataset.num_examples_per_epoch() /
                                float(FLAGS.eval_batch_size))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.logdir,
            logdir=FLAGS.logdir,
            num_evals=num_batches,
            eval_op=names_to_updates.values(),
            eval_interval_secs=FLAGS.eval_interval_secs,
            session_config=config)
Ejemplo n.º 10
0
def main_fun(argv, ctx):
    import tensorflow as tf
    from inception import inception_eval
    from inception.imagenet_data import ImagenetData

    print("argv:", argv)
    sys.argv = argv

    FLAGS = tf.app.flags.FLAGS
    FLAGS._parse_flags()
    print("FLAGS:", FLAGS.__dict__['__flags'])

    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    if tf.gfile.Exists(FLAGS.eval_dir):
        tf.gfile.DeleteRecursively(FLAGS.eval_dir)
    tf.gfile.MakeDirs(FLAGS.eval_dir)

    cluster_spec, server = TFNode.start_cluster_server(ctx, 1, FLAGS.rdma)

    inception_eval.evaluate(dataset)
def main_fun(argv, ctx):
  import tensorflow as tf
  from inception import inception_eval
  from inception.imagenet_data import ImagenetData

  print("argv:", argv)
  sys.argv = argv

  FLAGS = tf.app.flags.FLAGS
  FLAGS._parse_flags()
  print("FLAGS:", FLAGS.__dict__['__flags'])

  dataset = ImagenetData(subset=FLAGS.subset)
  assert dataset.data_files()
  if tf.gfile.Exists(FLAGS.eval_dir):
    tf.gfile.DeleteRecursively(FLAGS.eval_dir)
  tf.gfile.MakeDirs(FLAGS.eval_dir)

  cluster_spec, server = TFNode.start_cluster_server(ctx)

  inception_eval.evaluate(dataset)
def main_fun(argv, ctx):

    # extract node metadata from ctx
    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index

    assert job_name in ['ps', 'worker'], 'job_name must be ps or worker'

    from inception import inception_distributed_train
    from inception.imagenet_data import ImagenetData
    import tensorflow as tf

    # instantiate FLAGS on workers using argv from driver and add job_name and task_id
    print("argv:", argv)
    sys.argv = argv

    FLAGS = tf.app.flags.FLAGS
    FLAGS.job_name = job_name
    FLAGS.task_id = task_index
    print("FLAGS:", FLAGS.__dict__['__flags'])

    # Get TF cluster and server instances
    cluster_spec, server = TFNode.start_cluster_server(ctx, FLAGS.num_gpus,
                                                       FLAGS.rdma)

    if FLAGS.job_name == 'ps':
        # `ps` jobs wait for incoming connections from the workers.
        server.join()
    else:
        # `worker` jobs will actually do the work.
        dataset = ImagenetData(subset=FLAGS.subset)
        assert dataset.data_files()
        # Only the chief checks for or creates train_dir.
        if FLAGS.task_id == 0:
            if not tf.gfile.Exists(FLAGS.train_dir):
                tf.gfile.MakeDirs(FLAGS.train_dir)
        inception_distributed_train.train(server.target, dataset, cluster_spec,
                                          ctx)
def main_fun(argv, ctx):

  # extract node metadata from ctx
  worker_num = ctx.worker_num
  job_name = ctx.job_name
  task_index = ctx.task_index

  assert job_name in ['ps', 'worker'], 'job_name must be ps or worker'

  from inception import inception_distributed_train
  from inception.imagenet_data import ImagenetData
  import tensorflow as tf

  # instantiate FLAGS on workers using argv from driver and add job_name and task_id
  print("argv:", argv)
  sys.argv = argv

  FLAGS = tf.app.flags.FLAGS
  FLAGS.job_name = job_name
  FLAGS.task_id = task_index
  print("FLAGS:", FLAGS.__dict__['__flags'])

  # Get TF cluster and server instances
  cluster_spec, server = TFNode.start_cluster_server(ctx, FLAGS.num_gpus, FLAGS.rdma)

  if FLAGS.job_name == 'ps':
    # `ps` jobs wait for incoming connections from the workers.
    server.join()
  else:
    # `worker` jobs will actually do the work.
    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    # Only the chief checks for or creates train_dir.
    if FLAGS.task_id == 0:
      if not tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.MakeDirs(FLAGS.train_dir)
    inception_distributed_train.train(server.target, dataset, cluster_spec, ctx)
Ejemplo n.º 14
0
def build_input(dataset, data_path, batch_size, standardize_images, mode):
    if dataset == 'mnist':
        from datasets import mnist
        return mnist.build_input(data_path, batch_size, standardize_images, mode)
    elif dataset == 'svhn':
        from datasets import svhn
        return svhn.build_input(data_path, batch_size, standardize_images, mode)
    elif dataset == 'cifar10':
        from datasets import cifar
        return cifar.build_input(dataset, data_path, batch_size, standardize_images, mode)
    elif dataset == 'cifar100':
        from datasets import cifar
        return cifar.build_input(dataset, data_path, batch_size, standardize_images, mode)
    elif dataset == 'imagenet':
        from inception import image_processing
        from inception.imagenet_data import ImagenetData
        images, labels = image_processing.inputs(ImagenetData('validation'),
                                                 batch_size=batch_size)
        import tensorflow as tf
        labels = tf.one_hot(labels, 1001)
        return images, labels
    else:
        raise ValueError("Dataset {} not supported".format(dataset))
def export(args):
    FLAGS = tf.app.flags.FLAGS
    """Evaluate model on Dataset for a number of steps."""
    #with tf.Graph().as_default():
    tf.reset_default_graph()

    def preprocess_image(image_buffer):
        """Preprocess JPEG encoded bytes to 3D float Tensor."""

        # Decode the string as an RGB JPEG.
        # Note that the resulting image contains an unknown height and width
        # that is set dynamically by decode_jpeg. In other words, the height
        # and width of image is unknown at compile-time.
        image = tf.image.decode_jpeg(image_buffer, channels=3)
        # After this point, all image pixels reside in [0,1)
        # until the very end, when they're rescaled to (-1, 1).  The various
        # adjust_* ops all require this range for dtype float.
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        # Crop the central region of the image with an area containing 87.5% of
        # the original image.
        image = tf.image.central_crop(image, central_fraction=0.875)
        # Resize the image to the original height and width.
        image = tf.expand_dims(image, 0)
        image = tf.image.resize_bilinear(image,
                                         [FLAGS.image_size, FLAGS.image_size],
                                         align_corners=False)
        image = tf.squeeze(image, [0])
        # Finally, rescale to [-1,1] instead of [0, 1)
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        return image

    # Get images and labels from the dataset.
    jpegs = tf.placeholder(tf.string, [None], name='jpegs')
    images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)
    labels = tf.placeholder(tf.int32, [None], name='labels')

    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    dataset = ImagenetData(subset=FLAGS.subset)

    num_classes = dataset.num_classes() + 1

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits, _ = inception.inference(images, num_classes)

    # Calculate predictions.
    top_1_op = tf.nn.in_top_k(logits, labels, 1)
    top_5_op = tf.nn.in_top_k(logits, labels, 5)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        inception.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if not ckpt or not ckpt.model_checkpoint_path:
            raise Exception("No checkpoint file found at: {}".format(
                FLAGS.train_dir))
        print("ckpt.model_checkpoint_path: {0}".format(
            ckpt.model_checkpoint_path))

        saver.restore(sess, ckpt.model_checkpoint_path)

        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/imagenet_train/model.ckpt-0,
        # extract global_step from it.
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('Successfully loaded model from %s at step=%s.' %
              (ckpt.model_checkpoint_path, global_step))

        print("Exporting saved_model to: {}".format(args.export_dir))
        # exported signatures defined in code
        signatures = {
            tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            {
                'inputs': {
                    'jpegs': jpegs
                },
                'outputs': {
                    'logits': logits
                },
                'method_name':
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME
            }
        }
        TFNode.export_saved_model(sess, args.export_dir,
                                  tf.saved_model.tag_constants.SERVING,
                                  signatures)
        print("Exported saved_model")
Ejemplo n.º 16
0
def main(_):
    from inception.imagenet_data import ImagenetData
    from inception import image_processing
    dataset = ImagenetData(subset='train')
    assert dataset.data_files()
    NUM_LABELS = dataset.num_classes() + 1
    IMAGE_SHAPE = [FLAGS.image_size, FLAGS.image_size, 3]
    graph = tf.Graph()
    with graph.as_default():
        model = semisup.SemisupModel(inception_model, NUM_LABELS,
                                     IMAGE_SHAPE)

        # t_sup_images, t_sup_labels = tools.get_data('train')
        # t_unsup_images, _ = tools.get_data('unlabeled')

        images, labels = image_processing.batch_inputs(
            dataset, 32, train=True,
            num_preprocess_threads=FLAGS.num_readers,
            num_readers=FLAGS.num_readers)

        t_sup_images, t_sup_labels = tf.train.batch(
            [images, labels],
            batch_size=FLAGS.sup_batch_size,
            enqueue_many=True,
            num_threads=FLAGS.num_readers,
            capacity=1000 + 3 * FLAGS.sup_batch_size,
        )

        t_unsup_images, t_unsup_labels = tf.train.batch(
            [images, labels],
            batch_size=FLAGS.sup_batch_size,
            enqueue_many=True,
            num_threads=FLAGS.num_readers,
            capacity=1000 + 3 * FLAGS.sup_batch_size,
        )

        # Compute embeddings and logits.
        t_sup_emb = model.image_to_embedding(t_sup_images)
        t_unsup_emb = model.image_to_embedding(t_unsup_images)
        t_sup_logit = model.embedding_to_logit(t_sup_emb)

        # Add losses.
        model.add_semisup_loss(
            t_sup_emb, t_unsup_emb, t_sup_labels, visit_weight=FLAGS.visit_weight)

        model.add_logit_loss(t_sup_logit, t_sup_labels)


        t_learning_rate = tf.maximum(
                tf.train.exponential_decay(
                    FLAGS.learning_rate,
                    model.step,
                    FLAGS.decay_steps,
                    FLAGS.decay_factor,
                    staircase=True),
                FLAGS.minimum_learning_rate)

        # Create training operation and start the actual training loop.
        train_op = model.create_train_op(t_learning_rate)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        slim.learning.train(
          train_op,
          logdir=FLAGS.logdir,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs,
          master=FLAGS.master,
          is_chief=(FLAGS.task == 0),
          startup_delay_steps=(FLAGS.task * 20),
          log_every_n_steps=FLAGS.log_every_n_steps,
          session_config=config)