Esempio n. 1
0
def main(argv):
  parser = WideDeepArgParser()
  flags = parser.parse_args(args=argv[1:])

  # Clean up the model directory if present
  shutil.rmtree(flags.model_dir, ignore_errors=True)
  model = build_estimator(flags.model_dir, flags.model_type)

  train_file = os.path.join(flags.data_dir, 'adult.data')
  test_file = os.path.join(flags.data_dir, 'adult.test')

  # Train and evaluate the model every `FLAGS.epochs_per_eval` epochs.
  def train_input_fn():
    return input_fn(train_file, flags.epochs_per_eval, True, flags.batch_size)

  def eval_input_fn():
    return input_fn(test_file, 1, False, flags.batch_size)

  train_hooks = hooks_helper.get_train_hooks(
      flags.hooks, batch_size=flags.batch_size,
      tensors_to_log={'average_loss': 'head/truediv',
                      'loss': 'head/weighted_loss/Sum'})

  # Train and evaluate the model every `FLAGS.epochs_between_evals` epochs.
  for n in range(flags.train_epochs // flags.epochs_between_evals):
    model.train(input_fn=train_input_fn, hooks=train_hooks)
    results = model.evaluate(input_fn=eval_input_fn)

    # Display evaluation metrics
    print('Results at epoch', (n + 1) * flags.epochs_between_evals)
    print('-' * 60)

    for key in sorted(results):
      print('%s: %s' % (key, results[key]))
def dan_main(flags, model_function, input_function):
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)
        model_function = tf.contrib.estimator.replicate_model_fn(model_function,loss_reduction=tf.losses.Reduction.MEAN)

    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)
    run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9,
                                                    session_config=session_config)
    estimator = tf.estimator.Estimator(
        model_fn=model_function, model_dir=flags.model_dir, config=run_config,
        params={
                'dan_stage':flags.dan_stage,
                'num_lmark':flags.num_lmark,
                'data_format': flags.data_format,
                'batch_size': flags.batch_size,
                'multi_gpu': flags.multi_gpu,
            })

    if flags.mode == tf.estimator.ModeKeys.PREDICT:
        import cv2
        predict_results = estimator.predict(input_function)
        for x in predict_results:
            landmark = x['s2_ret']
            img = x['img']

            cv2.imshow('t',img)
            cv2.waitKey(30)
        return


    def input_fn_eval():
        return input_function(False, flags.data_dir if flags.data_dir_test is not None else flags.data_dir_test, flags.batch_size,
                              1, flags.num_parallel_calls, flags.multi_gpu)

    def input_fn_train():
        return input_function(True, flags.data_dir, flags.batch_size,
                              flags.epochs_per_eval, flags.num_parallel_calls,
                              flags.multi_gpu)

    if flags.mode == tf.estimator.ModeKeys.EVAL:
        eval_results = estimator.evaluate(input_fn=input_fn_eval,steps=flags.max_train_steps)
        print(eval_results)

    if flags.mode == tf.estimator.ModeKeys.TRAIN:
        for _ in range(flags.train_epochs // flags.epochs_per_eval):
            train_hooks = hooks_helper.get_train_hooks(["LoggingTensorHook"], batch_size=flags.batch_size)

            print('Starting a training cycle.')
            estimator.train(input_fn=input_fn_train,
                            max_steps=flags.max_train_steps)

            print('Starting to evaluate.')
            eval_results = estimator.evaluate(input_fn=input_fn_eval,
                                                steps=flags.max_train_steps)
            print(eval_results)
Esempio n. 3
0
def main(argv):
  parser = WideDeepArgParser()
  flags = parser.parse_args(args=argv[1:])

  # Clean up the model directory if present
  shutil.rmtree(flags.model_dir, ignore_errors=True)
  model = build_estimator(flags.model_dir, flags.model_type)

  train_file = os.path.join(flags.data_dir, 'adult.data')
  test_file = os.path.join(flags.data_dir, 'adult.test')

  # Train and evaluate the model every `flags.epochs_between_evals` epochs.
  def train_input_fn():
    return input_fn(train_file, flags.epochs_between_evals, True, flags.batch_size)

  def eval_input_fn():
    return input_fn(test_file, 1, False, flags.batch_size)

  train_hooks = hooks_helper.get_train_hooks(
      flags.hooks, batch_size=flags.batch_size,
      tensors_to_log={'average_loss': 'head/truediv',
                      'loss': 'head/weighted_loss/Sum'})

  # Train and evaluate the model every `flags.epochs_between_evals` epochs.
  for n in range(flags.train_epochs // flags.epochs_between_evals):
    model.train(input_fn=train_input_fn, hooks=train_hooks)
    results = model.evaluate(input_fn=eval_input_fn)

    # Display evaluation metrics
    print('Results at epoch', (n + 1) * flags.epochs_between_evals)
    print('-' * 60)

    for key in sorted(results):
      print('%s: %s' % (key, results[key]))
Esempio n. 4
0
def main(_):
    model_function = model_fn

    if FLAGS.multi_gpu:
        validate_batch_size_for_multi_gpu(FLAGS.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_fn, loss_reduction=tf.losses.Reduction.MEAN)

    data_format = FLAGS.data_format
    if data_format is None:
        data_format = ('channels_first'
                       if tf.test.is_built_with_cuda() else 'channels_last')
    mnist_classifier = tf.estimator.Estimator(model_fn=model_function,
                                              model_dir=FLAGS.model_dir,
                                              params={
                                                  'data_format': data_format,
                                                  'multi_gpu': FLAGS.multi_gpu
                                              })

    # Set up training and evaluation input functions.
    def train_input_fn():
        """Prepare data for training."""

        # When choosing shuffle buffer sizes, larger sizes result in better
        # randomness, while smaller sizes use less memory. MNIST is a small
        # enough dataset that we can easily shuffle the full epoch.
        ds = dataset.train(FLAGS.data_dir)
        ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size)

        # Iterate through the dataset a set number (`epochs_between_evals`) of times
        # during each training session.
        ds = ds.repeat(FLAGS.epochs_between_evals)
        return ds

    def eval_input_fn():
        return dataset.test(FLAGS.data_dir).batch(
            FLAGS.batch_size).make_one_shot_iterator().get_next()

    # Set up hook that outputs training logs every 100 steps.
    train_hooks = hooks_helper.get_train_hooks(FLAGS.hooks,
                                               batch_size=FLAGS.batch_size)

    # Train and evaluate model.
    for _ in range(FLAGS.train_epochs // FLAGS.epochs_between_evals):
        mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks)
        eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
        print('\nEvaluation results:\n\t%s\n' % eval_results)

    # Export the model
    if FLAGS.export_dir is not None:
        image = tf.placeholder(tf.float32, [None, 28, 28])
        input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
            'image':
            image,
        })
        mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
Esempio n. 5
0
def main(unused_argv):
    # Clean up the model directory if present
    shutil.rmtree(FLAGS.model_dir, ignore_errors=True)
    model = build_estimator(FLAGS.model_dir, FLAGS.model_type)

    train_file = os.path.join(FLAGS.data_dir, 'adult.data')
    test_file = os.path.join(FLAGS.data_dir, 'adult.test')

    train_hooks = hooks_helper.get_train_hooks(FLAGS.hooks,
                                               batch_size=FLAGS.batch_size,
                                               tensors_to_log={
                                                   'average_loss':
                                                   'head/truediv',
                                                   'loss':
                                                   'head/weighted_loss/Sum'
                                               })

    # Train and evaluate the model every `FLAGS.epochs_between_evals` epochs.
    for n in range(FLAGS.train_epochs // FLAGS.epochs_between_evals):
        model.train(input_fn=lambda: input_fn(
            train_file, FLAGS.epochs_between_evals, True, FLAGS.batch_size),
                    hooks=train_hooks)

        results = model.evaluate(
            input_fn=lambda: input_fn(test_file, 1, False, FLAGS.batch_size))

        # Display evaluation metrics
        print('Results at epoch', (n + 1) * FLAGS.epochs_between_evals)
        print('-' * 60)

        for key in sorted(results):
            print('%s: %s' % (key, results[key]))
Esempio n. 6
0
 def validate_train_hook_name(self, test_hook_name, expected_hook_name,
                              **kwargs):
     returned_hook = hooks_helper.get_train_hooks([test_hook_name],
                                                  **kwargs)
     self.assertEqual(len(returned_hook), 1)
     self.assertIsInstance(returned_hook[0], tf.train.SessionRunHook)
     self.assertEqual(returned_hook[0].__class__.__name__.lower(),
                      expected_hook_name)
Esempio n. 7
0
def resnet_main(flags, model_function, input_function):
    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_function, loss_reduction=tf.losses.Reduction.MEAN)

    # Create session config based on values of inter_op_parallelism_threads and
    # intra_op_parallelism_threads. Note that we default to having
    # allow_soft_placement = True, which is required for multi-GPU and not
    # harmful for other modes.
    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)

    # Set up a RunConfig to save checkpoint and set session config.
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    classifier = tf.estimator.Estimator(model_fn=model_function,
                                        model_dir=flags.model_dir,
                                        config=run_config,
                                        params={
                                            'resnet_size': flags.resnet_size,
                                            'data_format': flags.data_format,
                                            'batch_size': flags.batch_size,
                                            'multi_gpu': flags.multi_gpu,
                                            'version': flags.version,
                                        })

    for _ in range(flags.train_epochs // flags.epochs_per_eval):
        train_hooks = hooks_helper.get_train_hooks(flags.hooks,
                                                   batch_size=flags.batch_size)

        print('Starting a training cycle.')

        def input_fn_train():
            return input_function(True, flags.data_dir, flags.batch_size,
                                  flags.epochs_per_eval,
                                  flags.num_parallel_calls, flags.multi_gpu)

        classifier.train(input_fn=input_fn_train, hooks=train_hooks)

        print('Starting to evaluate.')

        # Evaluate the model and print results
        def input_fn_eval():
            return input_function(False, flags.data_dir, flags.batch_size, 1,
                                  flags.num_parallel_calls, flags.multi_gpu)

        eval_results = classifier.evaluate(input_fn=input_fn_eval)
        print(eval_results)
Esempio n. 8
0
def dan_main(flags, model_function, input_function, file_path=None):
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_function, loss_reduction=tf.losses.Reduction.MEAN)

    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    estimator = tf.estimator.Estimator(model_fn=model_function,
                                       model_dir=flags.model_dir,
                                       config=run_config,
                                       params={
                                           'dan_stage': flags.dan_stage,
                                           'num_lmark': flags.num_lmark,
                                           'data_format': flags.data_format,
                                           'batch_size': flags.batch_size,
                                           'multi_gpu': flags.multi_gpu,
                                       })

    # if flags.mode == tf.estimator.ModeKeys.PREDICT:
    #     import cv2
    #     predict_results = estimator.predict(input_function)
    #     for x in predict_results:
    #         landmark = x['s2_ret']
    #         img = x['img']

    #         cv2.imshow('t',img)
    #         cv2.waitKey(30)
    #     return

    if flags.mode == tf.estimator.ModeKeys.PREDICT:
        print('**********************')
        ind_backslash = file_path.rfind("\\")
        ind_png = file_path.rfind(".png")
        print(file_path[ind_backslash + 1:ind_png])
        file_name = file_path[ind_backslash + 1:ind_png]
        import cv2
        predict_results = estimator.predict(input_function)
        for x in predict_results:
            landmark = x['s2_ret']
            img = x['img']
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            for lm in landmark:
                cv2.circle(img, (lm[0], lm[1]), 1, (0, 0, 255), -1)

            # cv2.imshow('t',img)
            # cv2.waitKey(30)
            print(file_path)
            cv2.imwrite('./results/' + file_name + '_pred.png', img)

            np.savetxt('./results/' + file_name + '_pred.pts',
                       landmark,
                       delimiter=" ",
                       fmt='%i')
        return

    def input_fn_eval():
        return input_function(
            False, flags.data_dir
            if flags.data_dir_test is not None else flags.data_dir_test,
            flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu)

    def input_fn_train():
        return input_function(True, flags.data_dir, flags.batch_size,
                              flags.epochs_per_eval, flags.num_parallel_calls,
                              flags.multi_gpu)

    if flags.mode == tf.estimator.ModeKeys.EVAL:
        eval_results = estimator.evaluate(input_fn=input_fn_eval,
                                          steps=flags.max_train_steps)
        print(eval_results)

    if flags.mode == tf.estimator.ModeKeys.TRAIN:
        for _ in range(flags.train_epochs // flags.epochs_per_eval):
            train_hooks = hooks_helper.get_train_hooks(
                ["LoggingTensorHook"], batch_size=flags.batch_size)

            print('************** Starting a training cycle.')
            estimator.train(input_fn=input_fn_train,
                            max_steps=flags.max_train_steps)

            print('************** Starting to evaluate.')
            eval_results = estimator.evaluate(input_fn=input_fn_eval,
                                              steps=flags.max_train_steps)
            print(eval_results)
Esempio n. 9
0
def resnet_main(flags, model_function, input_function, shape=None):
  """Shared main loop for ResNet Models.

  Args:
    flags: FLAGS object that contains the params for running. See
      ResnetArgParser for created flags.
    model_function: the function that instantiates the Model and builds the
      ops for train/eval. This will be passed directly into the estimator.
    input_function: the function that processes the dataset and returns a
      dataset that the estimator can train on. This will be wrapped with
      all the relevant flags for running and passed to estimator.
    shape: list of ints representing the shape of the images used for training.
      This is only used if flags.export_dir is passed.
  """

  # Using the Winograd non-fused algorithms provides a small performance boost.
  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

  if flags.multi_gpu:
    validate_batch_size_for_multi_gpu(flags.batch_size)

    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
    # and (2) wrap the optimizer. The first happens here, and (2) happens
    # in the model_fn itself when the optimizer is defined.
    model_function = tf.contrib.estimator.replicate_model_fn(
        model_function,
        loss_reduction=tf.losses.Reduction.MEAN)

  # Create session config based on values of inter_op_parallelism_threads and
  # intra_op_parallelism_threads. Note that we default to having
  # allow_soft_placement = True, which is required for multi-GPU and not
  # harmful for other modes.
  session_config = tf.ConfigProto(
      inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
      intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
      allow_soft_placement=True)

  # Set up a RunConfig to save checkpoint and set session config.
  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9,
                                                session_config=session_config)
  classifier = tf.estimator.Estimator(
      model_fn=model_function, model_dir=flags.model_dir, config=run_config,
      params={
          'resnet_size': flags.resnet_size,
          'data_format': flags.data_format,
          'batch_size': flags.batch_size,
          'multi_gpu': flags.multi_gpu,
          'version': flags.version,
      })

  if flags.benchmark_log_dir is not None:
    benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
    benchmark_logger.log_run_info("resnet")
  else:
    benchmark_logger = None

  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    train_hooks = hooks_helper.get_train_hooks(
        flags.hooks,
        batch_size=flags.batch_size,
        benchmark_log_dir=flags.benchmark_log_dir)

    print('Starting a training cycle.')

    def input_fn_train():
      return input_function(True, flags.data_dir, flags.batch_size,
                            flags.epochs_between_evals,
                            flags.num_parallel_calls, flags.multi_gpu)

    classifier.train(input_fn=input_fn_train, hooks=train_hooks,
                     max_steps=flags.max_train_steps)

    print('Starting to evaluate.')
    # Evaluate the model and print results
    def input_fn_eval():
      return input_function(False, flags.data_dir, flags.batch_size,
                            1, flags.num_parallel_calls, flags.multi_gpu)

    # flags.max_train_steps is generally associated with testing and profiling.
    # As a result it is frequently called with synthetic data, which will
    # iterate forever. Passing steps=flags.max_train_steps allows the eval
    # (which is generally unimportant in those circumstances) to terminate.
    # Note that eval will run for max_train_steps each loop, regardless of the
    # global_step count.
    eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                       steps=flags.max_train_steps)
    print(eval_results)

    if benchmark_logger:
      benchmark_logger.log_estimator_evaluation_result(eval_results)

  if flags.export_dir is not None:
    warn_on_multi_gpu_export(flags.multi_gpu)

    # Exports a saved model for the given classifier.
    input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
        shape, batch_size=flags.batch_size)
    classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
Esempio n. 10
0
def resnet_main(flags, model_function, input_function, shape=None):
    """Shared main loop for ResNet Models.

  Args:
    flags: FLAGS object that contains the params for running. See
      ResnetArgParser for created flags.
    model_function: the function that instantiates the Model and builds the
      ops for train/eval. This will be passed directly into the estimator.
    input_function: the function that processes the dataset and returns a
      dataset that the estimator can train on. This will be wrapped with
      all the relevant flags for running and passed to estimator.
    shape: list of ints representing the shape of the images used for training.
      This is only used if flags.export_dir is passed.
  """

    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    # Create session config based on values of inter_op_parallelism_threads and
    # intra_op_parallelism_threads. Note that we default to having
    # allow_soft_placement = True, which is required for multi-GPU and not
    # harmful for other modes.
    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)

    if ALLOW_MULTIPLE_MODELS:
        session_config.gpu_options.allow_growth = True

    # Set up a RunConfig to save checkpoint and set session config.
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=5 * 60,  # Save checkpoints every X minutes.
        keep_checkpoint_max=1000,  # Retain the 1000 most recent checkpoints.
        #tf_random_seed = 5739,         # Set random seed for "reproducible" results
        save_summary_steps=10000,  # Number of steps between summaries
        session_config=session_config)

    classifier = tf.estimator.Estimator(model_fn=model_function,
                                        model_dir=flags.model_dir,
                                        config=run_config,
                                        params={
                                            'resnet_size':
                                            flags.resnet_size,
                                            'data_format':
                                            flags.data_format,
                                            'batch_size':
                                            flags.batch_size,
                                            'multi_gpu':
                                            flags.multi_gpu,
                                            'version':
                                            flags.version,
                                            'ncmmethod':
                                            flags.ncmmethod,
                                            'ncmparam':
                                            flags.ncmparam,
                                            'initial_learning_scale':
                                            flags.initial_learning_scale
                                        })

    if flags.benchmark_log_dir is not None:
        benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
        benchmark_logger.log_run_info("resnet")
    else:
        benchmark_logger = None

    for _ in range(flags.train_epochs // flags.epochs_between_evals):
        train_hooks = hooks_helper.get_train_hooks(
            flags.hooks,
            batch_size=flags.batch_size,
            benchmark_log_dir=flags.benchmark_log_dir)
        #tensors_to_log = {"iter": "m_iter","deep-cnt": "m_cnt", "deep-sum": "m_sum"}
        #logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1)

        print('Starting a training cycle.')

        def input_fn_train():
            return input_function(True, flags.data_dir, flags.batch_size,
                                  flags.epochs_between_evals,
                                  flags.num_parallel_calls, flags.multi_gpu)

        classifier.train(input_fn=input_fn_train,
                         hooks=train_hooks,
                         max_steps=flags.max_train_steps)

        print('Starting to evaluate.')

        # Evaluate the model and print results
        def input_fn_eval():
            return input_function(False, flags.data_dir, flags.batch_size, 1,
                                  flags.num_parallel_calls, flags.multi_gpu)

        # flags.max_train_steps is generally associated with testing and profiling.
        # As a result it is frequently called with synthetic data, which will
        # iterate forever. Passing steps=flags.max_train_steps allows the eval
        # (which is generally unimportant in those circumstances) to terminate.
        # Note that eval will run for max_train_steps each loop, regardless of the
        # global_step count.
        eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                           steps=flags.max_train_steps)
        print(eval_results)

        if benchmark_logger:
            benchmark_logger.log_estimator_evaluation_result(eval_results)

        if flags.export_dir is not None:
            # Exports a saved model for the given classifier.
            input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
                shape, batch_size=flags.batch_size)
            classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
Esempio n. 11
0
def main(argv):
  parser = MNISTArgParser()
  flags = parser.parse_args(args=argv[1:])

  model_function = model_fn

  if flags.multi_gpu:
    validate_batch_size_for_multi_gpu(flags.batch_size)

    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
    # and (2) wrap the optimizer. The first happens here, and (2) happens
    # in the model_fn itself when the optimizer is defined.
    model_function = tf.contrib.estimator.replicate_model_fn(
        model_fn, loss_reduction=tf.losses.Reduction.MEAN)

  data_format = flags.data_format
  if data_format is None:
    data_format = ('channels_first'
                   if tf.test.is_built_with_cuda() else 'channels_last')
  mnist_classifier = tf.estimator.Estimator(
      model_fn=model_function,
      model_dir=flags.model_dir,
      params={
          'data_format': data_format,
          'multi_gpu': flags.multi_gpu
      })

  # Set up training and evaluation input functions.
  def train_input_fn():
    """Prepare data for training."""

    # When choosing shuffle buffer sizes, larger sizes result in better
    # randomness, while smaller sizes use less memory. MNIST is a small
    # enough dataset that we can easily shuffle the full epoch.
    ds = dataset.train(flags.data_dir)
    ds = ds.cache().shuffle(buffer_size=50000).batch(flags.batch_size)

    # Iterate through the dataset a set number (`epochs_between_evals`) of times
    # during each training session.
    ds = ds.repeat(flags.epochs_between_evals)
    return ds

  def eval_input_fn():
    return dataset.test(flags.data_dir).batch(
        flags.batch_size).make_one_shot_iterator().get_next()

  # Set up hook that outputs training logs every 100 steps.
  train_hooks = hooks_helper.get_train_hooks(
      flags.hooks, batch_size=flags.batch_size)

  # Train and evaluate model.
  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks)
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    print('\nEvaluation results:\n\t%s\n' % eval_results)

  # Export the model
  if flags.export_dir is not None:
    image = tf.placeholder(tf.float32, [None, 28, 28])
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'image': image,
    })
    mnist_classifier.export_savedmodel(flags.export_dir, input_fn)
Esempio n. 12
0
 def test_raise_in_invalid_names(self):
     invalid_names = ['StepCounterHook', 'StopAtStepHook']
     with self.assertRaises(ValueError):
         hooks_helper.get_train_hooks(invalid_names, batch_size=256)
Esempio n. 13
0
 def test_raise_in_non_list_names(self):
     with self.assertRaises(ValueError):
         hooks_helper.get_train_hooks(
             'LoggingTensorHook, ProfilerHook', batch_size=256)
Esempio n. 14
0
def resnet_main(flags, model_function, input_function):
  """Shared main loop for ResNet Models."""

  # Using the Winograd non-fused algorithms provides a small performance boost.
  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

  if flags.multi_gpu:
    validate_batch_size_for_multi_gpu(flags.batch_size)

    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
    # and (2) wrap the optimizer. The first happens here, and (2) happens
    # in the model_fn itself when the optimizer is defined.
    model_function = tf.contrib.estimator.replicate_model_fn(
        model_function,
        loss_reduction=tf.losses.Reduction.MEAN)

  # Create session config based on values of inter_op_parallelism_threads and
  # intra_op_parallelism_threads. Note that we default to having
  # allow_soft_placement = True, which is required for multi-GPU and not
  # harmful for other modes.
  session_config = tf.ConfigProto(
      inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
      intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
      allow_soft_placement=True)

  # Set up a RunConfig to save checkpoint and set session config.
  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9,
                                                session_config=session_config)
  classifier = tf.estimator.Estimator(
      model_fn=model_function, model_dir=flags.model_dir, config=run_config,
      params={
          'resnet_size': flags.resnet_size,
          'data_format': flags.data_format,
          'batch_size': flags.batch_size,
          'multi_gpu': flags.multi_gpu,
          'version': flags.version,
      })

  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    train_hooks = hooks_helper.get_train_hooks(
        flags.hooks, batch_size=flags.batch_size)

    print('Starting a training cycle.')

    def input_fn_train():
      return input_function(True, flags.data_dir, flags.batch_size,
                            flags.epochs_between_evals,
                            flags.num_parallel_calls, flags.multi_gpu)

    classifier.train(input_fn=input_fn_train, hooks=train_hooks,
                     max_steps=flags.max_train_steps)

    print('Starting to evaluate.')
    # Evaluate the model and print results
    def input_fn_eval():
      return input_function(False, flags.data_dir, flags.batch_size,
                            1, flags.num_parallel_calls, flags.multi_gpu)

    # flags.max_train_steps is generally associated with testing and profiling.
    # As a result it is frequently called with synthetic data, which will
    # iterate forever. Passing steps=flags.max_train_steps allows the eval
    # (which is generally unimportant in those circumstances) to terminate.
    # Note that eval will run for max_train_steps each loop, regardless of the
    # global_step count.
    eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                       steps=flags.max_train_steps)
    print(eval_results)
Esempio n. 15
0
def resnet_main(flags, model_function, input_function, shape=None):
    """Shared main loop for ResNet Models.

  Args:
    flags: FLAGS object that contains the params for running. See
      ResnetArgParser for created flags.
    model_function: the function that instantiates the Model and builds the
      ops for train/eval. This will be passed directly into the estimator.
    input_function: the function that processes the dataset and returns a
      dataset that the estimator can train on. This will be wrapped with
      all the relevant flags for running and passed to estimator.
    shape: list of ints representing the shape of the images used for training.
      This is only used if flags.export_dir is passed.
  """

    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_function, loss_reduction=tf.losses.Reduction.MEAN)

    # Create session config based on values of inter_op_parallelism_threads and
    # intra_op_parallelism_threads. Note that we default to having
    # allow_soft_placement = True, which is required for multi-GPU and not
    # harmful for other modes.
    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)

    # Set up a RunConfig to save checkpoint and set session config.
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    classifier = tf.estimator.Estimator(model_fn=model_function,
                                        model_dir=flags.model_dir,
                                        config=run_config,
                                        params={
                                            'resnet_size': flags.resnet_size,
                                            'data_format': flags.data_format,
                                            'batch_size': flags.batch_size,
                                            'multi_gpu': flags.multi_gpu,
                                            'version': flags.version,
                                        })

    if flags.benchmark_log_dir is not None:
        benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
        benchmark_logger.log_run_info("resnet")
    else:
        benchmark_logger = None

    for _ in range(flags.train_epochs // flags.epochs_between_evals):
        train_hooks = hooks_helper.get_train_hooks(
            flags.hooks,
            batch_size=flags.batch_size,
            benchmark_log_dir=flags.benchmark_log_dir)

        print('Starting a training cycle.')

        def input_fn_train():
            return input_function(True, flags.data_dir, flags.batch_size,
                                  flags.epochs_between_evals,
                                  flags.num_parallel_calls, flags.multi_gpu)

        classifier.train(input_fn=input_fn_train,
                         hooks=train_hooks,
                         max_steps=flags.max_train_steps)

        print('Starting to evaluate.')

        # Evaluate the model and print results
        def input_fn_eval():
            return input_function(False, flags.data_dir, flags.batch_size, 1,
                                  flags.num_parallel_calls, flags.multi_gpu)

        # flags.max_train_steps is generally associated with testing and profiling.
        # As a result it is frequently called with synthetic data, which will
        # iterate forever. Passing steps=flags.max_train_steps allows the eval
        # (which is generally unimportant in those circumstances) to terminate.
        # Note that eval will run for max_train_steps each loop, regardless of the
        # global_step count.
        eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                           steps=flags.max_train_steps)
        print(eval_results)

        if benchmark_logger:
            benchmark_logger.log_estimator_evaluation_result(eval_results)

    if flags.export_dir is not None:
        warn_on_multi_gpu_export(flags.multi_gpu)

        # Exports a saved model for the given classifier.
        input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
            shape, batch_size=flags.batch_size)
        classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
Esempio n. 16
0
def dan_main(flags, model_function, input_function):
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_function, loss_reduction=tf.losses.Reduction.MEAN)

    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    estimator = tf.estimator.Estimator(model_fn=model_function,
                                       model_dir=flags.model_dir,
                                       config=run_config,
                                       params={
                                           'dan_stage': flags.dan_stage,
                                           'num_lmark': flags.num_lmark,
                                           'data_format': flags.data_format,
                                           'batch_size': flags.batch_size,
                                           'multi_gpu': flags.multi_gpu,
                                       })

    if flags.mode == tf.estimator.ModeKeys.PREDICT:
        import glob
        import cv2
        import numpy as np

        def get_filenames(data_dir):
            listext = ['*.png', '*.jpg']

            imagelist = []
            for ext in listext:
                p = os.path.join(data_dir, ext)
                imagelist.extend(glob.glob(p))

            return imagelist

        def make_safely_folder(directory):
            try:
                if not os.path.exists(directory):
                    os.makedirs(directory)
            except OSError:
                print('Error: Creating directory. ' + directory)

        predict_results = estimator.predict(input_function)

        save_path = './prep/predict'
        make_safely_folder(save_path)
        img_path_list = get_filenames(flags.data_dir)
        img_path_generator = (x for x in img_path_list)
        for x in predict_results:
            landmark = x['s2_ret']
            img = x['img']

            img_path = next(img_path_generator)
            filename, _ = os.path.splitext(os.path.basename(img_path))
            np.savetxt(os.path.join(save_path, filename + '.ptv'),
                       landmark,
                       delimiter=',')
        return

    def input_fn_eval():
        return input_function(
            False, flags.data_dir
            if flags.data_dir_test is not None else flags.data_dir_test,
            flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu)

    def input_fn_train():
        return input_function(True, flags.data_dir, flags.batch_size,
                              flags.epochs_per_eval, flags.num_parallel_calls,
                              flags.multi_gpu)

    if flags.mode == tf.estimator.ModeKeys.EVAL:
        eval_results = estimator.evaluate(input_fn=input_fn_eval,
                                          steps=flags.max_train_steps)
        print(eval_results)

    if flags.mode == tf.estimator.ModeKeys.TRAIN:
        for _ in range(flags.train_epochs // flags.epochs_per_eval):
            train_hooks = hooks_helper.get_train_hooks(
                ["LoggingTensorHook"], batch_size=flags.batch_size)

            print('Starting a training cycle.')
            estimator.train(input_fn=input_fn_train,
                            max_steps=flags.max_train_steps)

            print('Starting to evaluate.')
            eval_results = estimator.evaluate(input_fn=input_fn_eval,
                                              steps=flags.max_train_steps)
            print(eval_results)