def main(_): dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.train_dir): tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) inception_train.train(dataset)
def main(unused_argv=None): dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.eval_dir): tf.gfile.DeleteRecursively(FLAGS.eval_dir) tf.gfile.MakeDirs(FLAGS.eval_dir) inception_eval.evaluate(dataset)
def main(unused_args): assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker' # Extract all the hostnames for the ps and worker jobs to construct the # cluster spec. ps_hosts = FLAGS.ps_hosts.split(',') worker_hosts = FLAGS.worker_hosts.split(',') tf.logging.info('PS hosts are: %s' % ps_hosts) tf.logging.info('Worker hosts are: %s' % worker_hosts) cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts, 'worker': worker_hosts}) server = tf.train.Server( {'ps': ps_hosts, 'worker': worker_hosts}, job_name=FLAGS.job_name, task_index=FLAGS.task_id, protocol=FLAGS.protocol) if FLAGS.job_name == 'ps': # `ps` jobs wait for incoming connections from the workers. server.join() else: # `worker` jobs will actually do the work. dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() # Only the chief checks for or creates train_dir. if FLAGS.task_id == 0: if not tf.gfile.Exists(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) inception_distributed_train.train(server.target, dataset, cluster_spec)
def main(unused_args): assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker' # Extract all the hostnames for the ps and worker jobs to construct the # cluster spec. ps_hosts = FLAGS.ps_hosts.split(',') worker_hosts = FLAGS.worker_hosts.split(',') tf.logging.info('PS hosts are: %s' % ps_hosts) tf.logging.info('Worker hosts are: %s' % worker_hosts) cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts, 'worker': worker_hosts}) server = tf.train.Server( {'ps': ps_hosts, 'worker': worker_hosts}, job_name=FLAGS.job_name, task_index=FLAGS.task_id) if FLAGS.job_name == 'ps': # `ps` jobs wait for incoming connections from the workers. server.join() else: # `worker` jobs will actually do the work. dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() # Only the chief checks for or creates train_dir. if FLAGS.task_id == 0: if not tf.gfile.Exists(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) inception_distributed_train.train(server.target, dataset, cluster_spec)
def main(unused_argv): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.worker_index) if FLAGS.job_name == "ps": server.join() sys.exit(0) # `worker` jobs will actually do the work. dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() # Only the chief checks for or creates train_dir. if FLAGS.task_id == 0: if not tf.gfile.Exists(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) inception_distributed_train.train(server.target, dataset, cluster_spec) num_workers = len(worker_hosts) worker_grpc_url = 'grpc://' + worker_hosts[0] print("Worker GRPC URL: %s" % worker_grpc_url) print("Worker index = %d" % FLAGS.worker_index) print("Number of workers = %d" % num_workers)
def main(_) : dataset = ImagenetData(subset = FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.train_dir) : tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) inception_train.train(dataset)
def main(unused_argv=None): dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.eval_dir): tf.gfile.DeleteRecursively(FLAGS.eval_dir) tf.gfile.MakeDirs(FLAGS.eval_dir) FLAGS.dataset_name = 'imagenet' FLAGS.num_examples = dataset.num_examples_per_epoch() inception_eval.evaluate(dataset)
def main(_): # Load dataset tf.app.flags.FLAGS.data_dir = '/work/haeusser/data/imagenet/shards' dataset = ImagenetData(subset='validation') assert dataset.data_files() num_labels = dataset.num_classes() + 1 image_shape = [FLAGS.image_size, FLAGS.image_size, 3] graph = tf.Graph() with graph.as_default(): images, labels = image_processing.batch_inputs( dataset, 32, train=True, num_preprocess_threads=16, num_readers=FLAGS.num_readers) # Set up semisup model. model = semisup.SemisupModel(semisup.architectures.inception_model, num_labels, image_shape, test_in=images) # Add moving average variables. for var in tf.get_collection('moving_vars'): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) for var in slim.get_model_variables(): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) # Get prediction tensor from semisup model. predictions = tf.argmax(model.test_logit, 1) # Accuracy metric for summaries. names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), }) for name, value in names_to_values.iteritems(): tf.summary.scalar(name, value) # Run the actual evaluation loop. num_batches = math.ceil(dataset.num_examples_per_epoch() / float(FLAGS.eval_batch_size)) config = tf.ConfigProto() config.gpu_options.allow_growth = True slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.logdir, logdir=FLAGS.logdir, num_evals=num_batches, eval_op=names_to_updates.values(), eval_interval_secs=FLAGS.eval_interval_secs, session_config=config)
def main_fun(argv, ctx): import tensorflow as tf from inception import inception_eval from inception.imagenet_data import ImagenetData print("argv:", argv) sys.argv = argv FLAGS = tf.app.flags.FLAGS FLAGS._parse_flags() print("FLAGS:", FLAGS.__dict__['__flags']) dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.eval_dir): tf.gfile.DeleteRecursively(FLAGS.eval_dir) tf.gfile.MakeDirs(FLAGS.eval_dir) cluster_spec, server = TFNode.start_cluster_server(ctx, 1, FLAGS.rdma) inception_eval.evaluate(dataset)
def main_fun(argv, ctx): import tensorflow as tf from inception import inception_eval from inception.imagenet_data import ImagenetData print("argv:", argv) sys.argv = argv FLAGS = tf.app.flags.FLAGS FLAGS._parse_flags() print("FLAGS:", FLAGS.__dict__['__flags']) dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() if tf.gfile.Exists(FLAGS.eval_dir): tf.gfile.DeleteRecursively(FLAGS.eval_dir) tf.gfile.MakeDirs(FLAGS.eval_dir) cluster_spec, server = TFNode.start_cluster_server(ctx) inception_eval.evaluate(dataset)
def main_fun(argv, ctx): # extract node metadata from ctx worker_num = ctx.worker_num job_name = ctx.job_name task_index = ctx.task_index assert job_name in ['ps', 'worker'], 'job_name must be ps or worker' from inception import inception_distributed_train from inception.imagenet_data import ImagenetData import tensorflow as tf # instantiate FLAGS on workers using argv from driver and add job_name and task_id print("argv:", argv) sys.argv = argv FLAGS = tf.app.flags.FLAGS FLAGS.job_name = job_name FLAGS.task_id = task_index print("FLAGS:", FLAGS.__dict__['__flags']) # Get TF cluster and server instances cluster_spec, server = TFNode.start_cluster_server(ctx, FLAGS.num_gpus, FLAGS.rdma) if FLAGS.job_name == 'ps': # `ps` jobs wait for incoming connections from the workers. server.join() else: # `worker` jobs will actually do the work. dataset = ImagenetData(subset=FLAGS.subset) assert dataset.data_files() # Only the chief checks for or creates train_dir. if FLAGS.task_id == 0: if not tf.gfile.Exists(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) inception_distributed_train.train(server.target, dataset, cluster_spec, ctx)
def build_input(dataset, data_path, batch_size, standardize_images, mode): if dataset == 'mnist': from datasets import mnist return mnist.build_input(data_path, batch_size, standardize_images, mode) elif dataset == 'svhn': from datasets import svhn return svhn.build_input(data_path, batch_size, standardize_images, mode) elif dataset == 'cifar10': from datasets import cifar return cifar.build_input(dataset, data_path, batch_size, standardize_images, mode) elif dataset == 'cifar100': from datasets import cifar return cifar.build_input(dataset, data_path, batch_size, standardize_images, mode) elif dataset == 'imagenet': from inception import image_processing from inception.imagenet_data import ImagenetData images, labels = image_processing.inputs(ImagenetData('validation'), batch_size=batch_size) import tensorflow as tf labels = tf.one_hot(labels, 1001) return images, labels else: raise ValueError("Dataset {} not supported".format(dataset))
def export(args): FLAGS = tf.app.flags.FLAGS """Evaluate model on Dataset for a number of steps.""" #with tf.Graph().as_default(): tf.reset_default_graph() def preprocess_image(image_buffer): """Preprocess JPEG encoded bytes to 3D float Tensor.""" # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height # and width of image is unknown at compile-time. image = tf.image.decode_jpeg(image_buffer, channels=3) # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of # the original image. image = tf.image.central_crop(image, central_fraction=0.875) # Resize the image to the original height and width. image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False) image = tf.squeeze(image, [0]) # Finally, rescale to [-1,1] instead of [0, 1) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) return image # Get images and labels from the dataset. jpegs = tf.placeholder(tf.string, [None], name='jpegs') images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) labels = tf.placeholder(tf.int32, [None], name='labels') # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. dataset = ImagenetData(subset=FLAGS.subset) num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) # Calculate predictions. top_1_op = tf.nn.in_top_k(logits, labels, 1) top_5_op = tf.nn.in_top_k(logits, labels, 5) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if not ckpt or not ckpt.model_checkpoint_path: raise Exception("No checkpoint file found at: {}".format( FLAGS.train_dir)) print("ckpt.model_checkpoint_path: {0}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) print("Exporting saved_model to: {}".format(args.export_dir)) # exported signatures defined in code signatures = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: { 'inputs': { 'jpegs': jpegs }, 'outputs': { 'logits': logits }, 'method_name': tf.saved_model.signature_constants.PREDICT_METHOD_NAME } } TFNode.export_saved_model(sess, args.export_dir, tf.saved_model.tag_constants.SERVING, signatures) print("Exported saved_model")
def main(_): from inception.imagenet_data import ImagenetData from inception import image_processing dataset = ImagenetData(subset='train') assert dataset.data_files() NUM_LABELS = dataset.num_classes() + 1 IMAGE_SHAPE = [FLAGS.image_size, FLAGS.image_size, 3] graph = tf.Graph() with graph.as_default(): model = semisup.SemisupModel(inception_model, NUM_LABELS, IMAGE_SHAPE) # t_sup_images, t_sup_labels = tools.get_data('train') # t_unsup_images, _ = tools.get_data('unlabeled') images, labels = image_processing.batch_inputs( dataset, 32, train=True, num_preprocess_threads=FLAGS.num_readers, num_readers=FLAGS.num_readers) t_sup_images, t_sup_labels = tf.train.batch( [images, labels], batch_size=FLAGS.sup_batch_size, enqueue_many=True, num_threads=FLAGS.num_readers, capacity=1000 + 3 * FLAGS.sup_batch_size, ) t_unsup_images, t_unsup_labels = tf.train.batch( [images, labels], batch_size=FLAGS.sup_batch_size, enqueue_many=True, num_threads=FLAGS.num_readers, capacity=1000 + 3 * FLAGS.sup_batch_size, ) # Compute embeddings and logits. t_sup_emb = model.image_to_embedding(t_sup_images) t_unsup_emb = model.image_to_embedding(t_unsup_images) t_sup_logit = model.embedding_to_logit(t_sup_emb) # Add losses. model.add_semisup_loss( t_sup_emb, t_unsup_emb, t_sup_labels, visit_weight=FLAGS.visit_weight) model.add_logit_loss(t_sup_logit, t_sup_labels) t_learning_rate = tf.maximum( tf.train.exponential_decay( FLAGS.learning_rate, model.step, FLAGS.decay_steps, FLAGS.decay_factor, staircase=True), FLAGS.minimum_learning_rate) # Create training operation and start the actual training loop. train_op = model.create_train_op(t_learning_rate) config = tf.ConfigProto() config.gpu_options.allow_growth = True slim.learning.train( train_op, logdir=FLAGS.logdir, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, master=FLAGS.master, is_chief=(FLAGS.task == 0), startup_delay_steps=(FLAGS.task * 20), log_every_n_steps=FLAGS.log_every_n_steps, session_config=config)