def main(_): mode = FLAGS.tf_hub_mode data_dir = amoeba_net.FLAGS.data_dir model_dir = amoeba_net.FLAGS.model_dir hparams = amoeba_net.build_hparams() hparams.add_hparam('drop_path_burn_in_steps', 0) hparams.set_hparam('use_tpu', False) # So far, there is no standardized way of exposing aux heads for # fine-tuning Hub image modules. Disable aux heads to avoid putting unused # variables and ops into the module. hparams.set_hparam('use_aux_head', False) eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size export_path = FLAGS.export_path or (model_dir + '/export') input_pipeline = model_lib.InputPipeline(is_training=False, data_dir=data_dir, hparams=hparams, eval_from_hub=True) if mode == 'eval_from_hub': eval_from_hub(export_path, input_pipeline.input_fn, eval_steps=eval_steps) elif mode == 'export_to_hub': num_classes = (None if FLAGS.export_feature_vector else input_pipeline.num_classes) if FLAGS.dryrun_with_untrained_weights: checkpoint_path = None else: checkpoint_path = tf.train.latest_checkpoint(model_dir) if not checkpoint_path: raise IOError('No checkpoint found.') export_to_hub(checkpoint_path, export_path, num_classes, hparams) else: raise ValueError('Unsupported tf_hub_mode = {}'.format(mode))
def main(_): mode = FLAGS.mode data_dir = FLAGS.data_dir model_dir = FLAGS.model_dir hparams = build_hparams() estimator_parmas = {} train_steps_per_epoch = int( math.ceil(model_lib.NUM_TRAIN_IMAGES / float(hparams.train_batch_size))) eval_steps = model_lib.NUM_EVAL_IMAGES // hparams.eval_batch_size eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size) run_config = build_run_config() model = model_lib.AmoebaNetEstimatorModel(hparams, model_dir) if hparams.use_tpu: image_classifier = tpu_estimator.TPUEstimator( model_fn=model.model_fn, use_tpu=True, config=run_config, params=estimator_parmas, train_batch_size=hparams.train_batch_size, eval_batch_size=eval_batch_size) else: image_classifier = tf.estimator.Estimator(model_fn=model.model_fn, config=run_config, params=estimator_parmas) # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. imagenet_train = model_lib.InputPipeline(is_training=True, data_dir=data_dir, hparams=hparams) imagenet_eval = model_lib.InputPipeline(is_training=False, data_dir=data_dir, hparams=hparams) if hparams.moving_average_decay < 1: eval_hooks = [ model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay) ] else: eval_hooks = [] if mode == 'eval': for checkpoint in _get_next_checkpoint(): tf.logging.info('Starting to evaluate.') try: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks, checkpoint_path=checkpoint) tf.logging.info('Evaluation results: %s' % eval_results) except tf.errors.NotFoundError: # skip checkpoint if it gets deleted prior to evaluation tf.logging.info('Checkpoint %s no longer exists ... skipping') elif mode == 'train_and_eval': current_step = _load_global_step_from_checkpoint_dir(model_dir) tf.logging.info('Starting training at step=%d.' % current_step) train_steps_per_eval = hparams.num_epochs_per_eval * train_steps_per_epoch # Final Evaluation if training is finished. if current_step >= hparams.num_epochs * train_steps_per_epoch: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) while current_step < hparams.num_epochs * train_steps_per_epoch: image_classifier.train(input_fn=imagenet_train.input_fn, steps=train_steps_per_eval) current_step += train_steps_per_eval tf.logging.info('Starting evaluation at step=%d.' % current_step) eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) else: current_step = _load_global_step_from_checkpoint_dir(model_dir) total_step = hparams.num_epochs * train_steps_per_epoch if current_step < total_step: tf.logging.info('Starting training ...') image_classifier.train(input_fn=imagenet_train.input_fn, steps=total_step - current_step)
def main(_): mode = FLAGS.mode data_dir = FLAGS.data_dir model_dir = FLAGS.model_dir hparams = build_hparams() estimator_parmas = {} train_steps_per_epoch = int( math.ceil(hparams.num_train_images / float(hparams.train_batch_size))) eval_steps = hparams.num_eval_images // hparams.eval_batch_size eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size) model = model_lib.AmoebaNetEstimatorModel(hparams, model_dir) if hparams.use_tpu: run_config = build_run_config() # Temporary treatment until flags are released. image_classifier = contrib_tpu.TPUEstimator( model_fn=model.model_fn, use_tpu=True, config=run_config, params=estimator_parmas, predict_batch_size=eval_batch_size, train_batch_size=hparams.train_batch_size, eval_batch_size=eval_batch_size, export_to_tpu=FLAGS.export_to_tpu) else: save_checkpoints_steps = (FLAGS.save_checkpoints_steps or FLAGS.iterations_per_loop) run_config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps) image_classifier = tf.estimator.Estimator(model_fn=model.model_fn, config=run_config, params=estimator_parmas) # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. imagenet_train = model_lib.InputPipeline(is_training=True, data_dir=data_dir, hparams=hparams) imagenet_eval = model_lib.InputPipeline(is_training=False, data_dir=data_dir, hparams=hparams) if hparams.moving_average_decay < 1: eval_hooks = [ model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay) ] else: eval_hooks = [] if mode == 'eval': for checkpoint in _get_next_checkpoint(): tf.logging.info('Starting to evaluate.') try: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks, checkpoint_path=checkpoint) tf.logging.info('Evaluation results: %s' % eval_results) except tf.errors.NotFoundError: # skip checkpoint if it gets deleted prior to evaluation tf.logging.info('Checkpoint %s no longer exists ... skipping') elif mode == 'train_and_eval': current_step = _load_global_step_from_checkpoint_dir(model_dir) tf.logging.info('Starting training at step=%d.' % current_step) train_steps_per_eval = int(hparams.num_epochs_per_eval * train_steps_per_epoch) # Final Evaluation if training is finished. if current_step >= hparams.num_epochs * train_steps_per_epoch: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) while current_step < hparams.num_epochs * train_steps_per_epoch: image_classifier.train(input_fn=imagenet_train.input_fn, steps=train_steps_per_eval) current_step += train_steps_per_eval tf.logging.info('Starting evaluation at step=%d.' % current_step) eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) elif mode == 'predict': for checkpoint in _get_next_checkpoint(): tf.logging.info('Starting prediction ...') time_hook = model_lib.SessionTimingHook() eval_hooks.append(time_hook) result_iter = image_classifier.predict( input_fn=imagenet_eval.input_fn, hooks=eval_hooks, checkpoint_path=checkpoint, yield_single_examples=False) results = list(itertools.islice(result_iter, eval_steps)) tf.logging.info('Inference speed = {} images per second.'.format( time_hook.compute_speed(len(results) * eval_batch_size))) elif mode == 'train': current_step = _load_global_step_from_checkpoint_dir(model_dir) total_step = int(hparams.num_epochs * train_steps_per_epoch) if current_step < total_step: tf.logging.info('Starting training ...') image_classifier.train(input_fn=imagenet_train.input_fn, steps=total_step - current_step) else: tf.logging.info('Mode not found.') if FLAGS.export_dir is not None: tf.logging.info('Starting exporting saved model ...') serving_shape = [hparams.image_size, hparams.image_size, 3] export_path = image_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=build_image_serving_input_receiver_fn( serving_shape), as_text=True) if FLAGS.add_warmup_requests: inference_warmup.write_warmup_requests( export_path, FLAGS.model_name, hparams.image_size, batch_sizes=FLAGS.inference_batch_sizes)
def main(_): mode = FLAGS.mode data_dir = FLAGS.data_dir model_dir = FLAGS.model_dir hparams = build_hparams() estimator_parmas = {} train_steps_per_epoch = int( math.ceil(hparams.num_train_images / float(hparams.train_batch_size))) eval_steps = hparams.num_eval_images // hparams.eval_batch_size eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size) model = slice_model_lib.AmoebaNetEstimatorModel(hparams, model_dir) save_checkpoints_steps = (FLAGS.save_checkpoints_steps or FLAGS.iterations_per_loop) prepare_tf_config() # rewrite_options = rewriter_config_pb2.RewriterConfig( # layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF) # graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options) session_config = tf.ConfigProto( # graph_options=graph_options, allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True)) if FLAGS.cross_pipeline: cluster_manager = cluster_utils.get_cluster_manager( config_proto=session_config) run_config = tf.estimator.RunConfig( log_step_count_steps=100, session_config=session_config, save_checkpoints_steps=save_checkpoints_steps) image_classifier = tf.estimator.Estimator(model_fn=model.model_fn, config=run_config, params=estimator_parmas) # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. imagenet_train = model_lib.InputPipeline(is_training=True, data_dir=data_dir, hparams=hparams) imagenet_eval = model_lib.InputPipeline(is_training=False, data_dir=data_dir, hparams=hparams) if hparams.moving_average_decay < 1: eval_hooks = [ model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay) ] else: eval_hooks = [] if mode == 'eval': for checkpoint in _get_next_checkpoint(): tf.logging.info('Starting to evaluate.') try: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks, checkpoint_path=checkpoint) tf.logging.info('Evaluation results: %s' % eval_results) except tf.errors.NotFoundError: # skip checkpoint if it gets deleted prior to evaluation tf.logging.info('Checkpoint %s no longer exists ... skipping') elif mode == 'train_and_eval': current_step = _load_global_step_from_checkpoint_dir(model_dir) tf.logging.info('Starting training at step=%d.' % current_step) train_steps_per_eval = int(hparams.num_epochs_per_eval * train_steps_per_epoch) # Final Evaluation if training is finished. if current_step >= hparams.num_epochs * train_steps_per_epoch: eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) while current_step < hparams.num_epochs * train_steps_per_epoch: image_classifier.train(input_fn=imagenet_train.input_fn, steps=train_steps_per_eval) current_step += train_steps_per_eval tf.logging.info('Starting evaluation at step=%d.' % current_step) eval_results = image_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) elif mode == 'predict': for checkpoint in _get_next_checkpoint(): tf.logging.info('Starting prediction ...') time_hook = model_lib.SessionTimingHook() eval_hooks.append(time_hook) result_iter = image_classifier.predict( input_fn=imagenet_eval.input_fn, hooks=eval_hooks, checkpoint_path=checkpoint, yield_single_examples=False) results = list(itertools.islice(result_iter, eval_steps)) tf.logging.info('Inference speed = {} images per second.'.format( time_hook.compute_speed(len(results) * eval_batch_size))) elif mode == 'train': current_step = _load_global_step_from_checkpoint_dir(model_dir) total_step = int(hparams.num_epochs * train_steps_per_epoch) if current_step < total_step: tf.logging.info('Starting training ...') image_classifier.train(input_fn=imagenet_train.input_fn, steps=min(total_step - current_step, FLAGS.max_steps)) else: tf.logging.info('Mode not found.')