Exemple #1
0
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='2',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)

  # The current implementation of ResNet v1 is numerically unstable when run
  # with fp16 and will produce NaN errors soon after training begins.
  msg = ('ResNet version 1 is not currently supported with fp16. '
         'Please use version 2 instead.')
  @flags.multi_flags_validator(['dtype', 'resnet_version'], message=msg)
  def _forbid_v1_fp16(flag_values):  # pylint: disable=unused-variable
    return (flags_core.DTYPE_MAP[flag_values['dtype']][0] != tf.float16 or
            flag_values['resnet_version'] != '1')
Exemple #2
0
def define_mnist_eager_flags():
  """Defined flags and defaults for MNIST in eager mode."""
  flags_core.define_base_eager()
  flags_core.define_image()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_integer(
      name='log_interval', short_name='li', default=10,
      help=flags_core.help_wrap('batches between logging training status'))

  flags.DEFINE_string(
      name='output_dir', short_name='od', default=None,
      help=flags_core.help_wrap('Directory to write TensorBoard summaries'))

  flags.DEFINE_float(name='learning_rate', short_name='lr', default=0.01,
                     help=flags_core.help_wrap('Learning rate.'))

  flags.DEFINE_float(name='momentum', short_name='m', default=0.5,
                     help=flags_core.help_wrap('SGD momentum.'))

  flags.DEFINE_bool(name='no_gpu', short_name='nogpu', default=False,
                    help=flags_core.help_wrap(
                        'disables GPU usage even if a GPU is available'))

  flags_core.set_defaults(
      data_dir='/tmp/tensorflow/mnist/input_data',
      model_dir='/tmp/tensorflow/mnist/checkpoints/',
      batch_size=100,
      train_epochs=10,
  )
Exemple #3
0
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_integer(
      name="num_train_images", default=1000,
      help=flags_core.help_wrap(
          "The number of synthetic images for training. The default value is "
          "1000."))

  flags.DEFINE_integer(
      name="num_eval_images", default=50,
      help=flags_core.help_wrap(
          "The number of synthetic images for evaluation. The default value is "
          "50."))

  flags.DEFINE_boolean(
      name="eager", default=False, help=flags_core.help_wrap(
          "To enable eager execution. Note that if eager execution is enabled, "
          "only one GPU is utilized even if multiple GPUs are provided and "
          "multi_gpu_model is used."))

  flags.DEFINE_boolean(
      name="dist_strat", default=False, help=flags_core.help_wrap(
          "To enable distribution strategy for model training and evaluation. "
          "Number of GPUs used for distribution strategy can be set by the "
          "argument --num_gpus."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))

  @flags.multi_flags_validator(
      ["eager", "dist_strat"],
      message="Both --eager and --dist_strat were set. Only one can be "
              "defined, as DistributionStrategy is not supported in Eager "
              "execution currently.")
  # pylint: disable=unused-variable
  def _check_eager_dist_strat(flag_dict):
    return not(flag_dict["eager"] and flag_dict["dist_strat"])
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='2',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name="eval_only", default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Exemple #6
0
def define_mnist_flags():
  flags_core.define_base(multi_gpu=True, num_gpu=False)
  flags_core.define_image()
  flags.adopt_module_key_flags(flags_core)
  flags_core.set_defaults(data_dir='/tmp/mnist_data',
                          model_dir='/tmp/mnist_model',
                          batch_size=100,
                          train_epochs=40)
Exemple #7
0
def define_mnist_flags():
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags.adopt_module_key_flags(flags_core)
  flags_core.set_defaults(data_dir='/tmp/mnist_data',
                          model_dir='/tmp/mnist_model',
                          batch_size=100,
                          train_epochs=40)
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                datasets_num_parallel_batches=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='1',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name='eval_only', default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))
  flags.DEFINE_boolean(
      name='image_bytes_as_serving_input', default=False,
      help=flags_core.help_wrap(
          'If True exports savedmodel with serving signature that accepts '
          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
          'represents the image. The former is easier to use for serving at '
          'the expense of image resize/cropping being done as part of model '
          'inference. Note, this flag only applies to ImageNet and cannot '
          'be used for CIFAR.'))
  flags.DEFINE_boolean(
      name='turn_off_distribution_strategy', default=False,
      help=flags_core.help_wrap('Set to True to not use distribution '
                                'strategies.'))
  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Exemple #9
0
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_integer(
      name="num_images", default=1000,
      help=flags_core.help_wrap(
          "The number of synthetic images for training and evaluation. The "
          "default value is 1000."))

  flags.DEFINE_boolean(
      name="eager", default=False, help=flags_core.help_wrap(
          "To enable eager execution. Note that if eager execution is enabled, "
          "only one GPU is utilized even if multiple GPUs are provided and "
          "multi_gpu_model is used."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))
Exemple #10
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
Exemple #11
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance(dynamic_loss_scale=True, loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
Exemple #12
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance(dynamic_loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
Exemple #13
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  datasets_num_parallel_batches=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(
        name='use_train_and_evaluate',
        default=False,
        help=flags_core.help_wrap(
            'If True, uses `tf.estimator.train_and_evaluate` for the training '
            'and evaluation loop, instead of separate calls to `classifier.train '
            'and `classifier.evaluate`, which is the default behavior.'))
    flags.DEFINE_string(
        name='worker_hosts',
        default=None,
        help=flags_core.help_wrap(
            'Comma-separated list of worker ip:port pairs for running '
            'multi-worker models with DistributionStrategy.  The user would '
            'start the program on each host with identical value for this flag.'
        ))
    flags.DEFINE_integer(name='task_index',
                         default=-1,
                         help=flags_core.help_wrap(
                             'If multi-worker training, the task_index of '
                             'this worker.'))
    flags.DEFINE_bool(name='enable_lars',
                      default=False,
                      help=flags_core.help_wrap(
                          'Enable LARS optimizer for large batch training.'))
    flags.DEFINE_float(
        name='label_smoothing',
        default=0.0,
        help=flags_core.help_wrap(
            'Label smoothing parameter used in the softmax_cross_entropy'))
    flags.DEFINE_float(name='weight_decay',
                       default=1e-4,
                       help=flags_core.help_wrap(
                           'Weight decay coefficiant for l2 regularization.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_mnist_flags():
    flags_core.define_base()
    flags_core.define_image()
    flags.adopt_module_key_flags(flags_core)
Exemple #15
0
def define_resnet_flags(resnet_size_choices=None,
                        dynamic_loss_scale=False,
                        fp16_implementation=False):
    """Add flags and validators for ResNet."""
    flags_core.define_base(clean=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           stop_threshold=True,
                           num_gpu=True,
                           hooks=True,
                           export_dir=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=True,
                                  intra_op=True,
                                  synthetic_data=True,
                                  dtype=True,
                                  all_reduce_alg=True,
                                  num_packs=True,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  fp16_implementation=fp16_implementation,
                                  loss_scale=True,
                                  tf_data_experimental_slack=True,
                                  max_train_steps=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags_core.define_distribution()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(
        name='use_train_and_evaluate',
        default=False,
        help=flags_core.help_wrap(
            'If True, uses `tf.estimator.train_and_evaluate` for the training '
            'and evaluation loop, instead of separate calls to `classifier.train '
            'and `classifier.evaluate`, which is the default behavior.'))
    flags.DEFINE_bool(name='enable_lars',
                      default=False,
                      help=flags_core.help_wrap(
                          'Enable LARS optimizer for large batch training.'))
    flags.DEFINE_float(
        name='label_smoothing',
        default=0.0,
        help=flags_core.help_wrap(
            'Label smoothing parameter used in the softmax_cross_entropy'))
    flags.DEFINE_float(name='weight_decay',
                       default=1e-4,
                       help=flags_core.help_wrap(
                           'Weight decay coefficiant for l2 regularization.'))
    flags.DEFINE_float(name='percent',
                       default=0,
                       help=flags_core.help_wrap('percent of data to poison'))
    flags.DEFINE_bool(
        name='adv_train',
        default=False,
        help=flags_core.help_wrap('whether adversarial training'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Exemple #16
0
def define_resnet_flags(resnet_size_choices=None, dynamic_loss_scale=False,
                        fp16_implementation=False):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                datasets_num_parallel_batches=True,
                                dynamic_loss_scale=dynamic_loss_scale,
                                fp16_implementation=fp16_implementation,
                                loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='1',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name='eval_only', default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))
  flags.DEFINE_boolean(
      name='image_bytes_as_serving_input', default=False,
      help=flags_core.help_wrap(
          'If True exports savedmodel with serving signature that accepts '
          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
          'represents the image. The former is easier to use for serving at '
          'the expense of image resize/cropping being done as part of model '
          'inference. Note, this flag only applies to ImageNet and cannot '
          'be used for CIFAR.'))
  flags.DEFINE_boolean(
      name='use_train_and_evaluate', default=False,
      help=flags_core.help_wrap(
          'If True, uses `tf.estimator.train_and_evaluate` for the training '
          'and evaluation loop, instead of separate calls to `classifier.train '
          'and `classifier.evaluate`, which is the default behavior.'))
  flags.DEFINE_string(
      name='worker_hosts', default=None,
      help=flags_core.help_wrap(
          'Comma-separated list of worker ip:port pairs for running '
          'multi-worker models with DistributionStrategy.  The user would '
          'start the program on each host with identical value for this flag.'))
  flags.DEFINE_integer(
      name='task_index', default=-1,
      help=flags_core.help_wrap('If multi-worker training, the task_index of '
                                'this worker.'))
  flags.DEFINE_bool(
      name='enable_lars', default=False,
      help=flags_core.help_wrap(
          'Enable LARS optimizer for large batch training.'))
  flags.DEFINE_float(
      name='label_smoothing', default=0.0,
      help=flags_core.help_wrap(
          'Label smoothing parameter used in the softmax_cross_entropy'))
  flags.DEFINE_float(
      name='weight_decay', default=1e-4,
      help=flags_core.help_wrap(
          'Weight decay coefficiant for l2 regularization.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Exemple #17
0
def define_keras_flags(dynamic_loss_scale=True):
    """Define flags for Keras models."""
    flags_core.define_base(clean=True,
                           num_gpu=True,
                           run_eagerly=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  synthetic_data=True,
                                  dtype=True,
                                  all_reduce_alg=True,
                                  num_packs=True,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  loss_scale=True,
                                  fp16_implementation=True,
                                  tf_data_experimental_slack=True,
                                  enable_xla=True,
                                  force_v2_in_keras_compile=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags_core.define_distribution()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_boolean(name='enable_eager',
                         default=False,
                         help='Enable eager?')
    flags.DEFINE_boolean(name='skip_eval',
                         default=False,
                         help='Skip evaluation?')
    # TODO(b/135607288): Remove this flag once we understand the root cause of
    # slowdown when setting the learning phase in Keras backend.
    flags.DEFINE_boolean(
        name='set_learning_phase_to_train',
        default=True,
        help='If skip eval, also set Keras learning phase to 1 (training).')
    flags.DEFINE_boolean(
        name='explicit_gpu_placement',
        default=False,
        help='If not using distribution strategy, explicitly set device scope '
        'for the Keras training loop.')
    flags.DEFINE_boolean(name='use_trivial_model',
                         default=False,
                         help='Whether to use a trivial Keras model.')
    flags.DEFINE_boolean(name='report_accuracy_metrics',
                         default=True,
                         help='Report metrics during training and evaluation.')
    flags.DEFINE_boolean(
        name='use_tensor_lr',
        default=False,
        help='Use learning rate tensor instead of a callback.')
    flags.DEFINE_boolean(name='enable_tensorboard',
                         default=False,
                         help='Whether to enable Tensorboard callback.')
    flags.DEFINE_integer(
        name='train_steps',
        default=None,
        help='The number of steps to run for training. If it is larger than '
        '# batches per epoch, then use # batches per epoch. When this flag is '
        'set, only one epoch is going to run for training.')
    flags.DEFINE_string(
        name='profile_steps',
        default=None,
        help='Save profiling data to model dir at given range of steps. The '
        'value must be a comma separated pair of positive integers, specifying '
        'the first and last step to profile. For example, "--profile_steps=2,4" '
        'triggers the profiler to process 3 steps, starting from the 2nd step. '
        'Note that profiler has a non-trivial performance overhead, and the '
        'output file can be gigantic if profiling many steps.')
    flags.DEFINE_boolean(
        name='data_delay_prefetch',
        default=False,
        help=
        'Add a small delay in tf.data prefetch to prioritize memory copy of '
        'other tensors over the data minibatch for the (T+1)th step. It should '
        'help improve performance using EagerIterator and function. The codepath '
        'when enabling this feature is experimental and will be removed once the '
        'corresponding performance features are fully supported in TensorFlow.'
    )
    flags.DEFINE_boolean(
        name='batchnorm_spatial_persistent',
        default=True,
        help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
    flags.DEFINE_boolean(
        name='enable_get_next_as_optional',
        default=False,
        help='Enable get_next_as_optional behavior in DistributedIterator.')
    flags.DEFINE_boolean(
        name='enable_checkpoint_and_export',
        default=False,
        help=
        'Whether to enable a checkpoint callback and export the savedmodel.')
    flags.DEFINE_string(name='tpu',
                        default='',
                        help='TPU address to connect to.')
    flags.DEFINE_integer(
        name='steps_per_loop',
        default=1,
        help='Number of steps per graph-mode loop. Only training step happens '
        'inside the loop. Callbacks will not be called inside. Will be capped at '
        'steps per epoch.')
Exemple #18
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  datasets_num_parallel_batches=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(name='turn_off_distribution_strategy',
                         default=False,
                         help=flags_core.help_wrap(
                             'Set to True to not use distribution '
                             'strategies.'))
    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Exemple #19
0
def define_keras_flags(model=False,
                       optimizer=False,
                       pretrained_filepath=False):
    """Define flags for Keras models."""
    flags_core.define_base(clean=True,
                           num_gpu=True,
                           run_eagerly=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  synthetic_data=True,
                                  dtype=True,
                                  all_reduce_alg=True,
                                  num_packs=True,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  loss_scale=True,
                                  fp16_implementation=True,
                                  tf_data_experimental_slack=True,
                                  enable_xla=True,
                                  training_dataset_cache=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags_core.define_distribution()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_boolean(name='enable_eager',
                         default=False,
                         help='Enable eager?')
    flags.DEFINE_boolean(name='skip_eval',
                         default=False,
                         help='Skip evaluation?')
    # TODO(b/135607288): Remove this flag once we understand the root cause of
    # slowdown when setting the learning phase in Keras backend.
    flags.DEFINE_boolean(
        name='set_learning_phase_to_train',
        default=True,
        help='If skip eval, also set Keras learning phase to 1 (training).')
    flags.DEFINE_boolean(
        name='explicit_gpu_placement',
        default=False,
        help='If not using distribution strategy, explicitly set device scope '
        'for the Keras training loop.')
    flags.DEFINE_boolean(name='use_trivial_model',
                         default=False,
                         help='Whether to use a trivial Keras model.')
    flags.DEFINE_boolean(name='report_accuracy_metrics',
                         default=True,
                         help='Report metrics during training and evaluation.')
    flags.DEFINE_boolean(
        name='use_tensor_lr',
        default=True,
        help='Use learning rate tensor instead of a callback.')
    flags.DEFINE_boolean(name='enable_tensorboard',
                         default=False,
                         help='Whether to enable TensorBoard callback.')
    flags.DEFINE_string(
        name='profile_steps',
        default=None,
        help=
        'Save profiling data to model dir at given range of global steps. The '
        'value must be a comma separated pair of positive integers, specifying '
        'the first and last step to profile. For example, "--profile_steps=2,4" '
        'triggers the profiler to process 3 steps, starting from the 2nd step. '
        'Note that profiler has a non-trivial performance overhead, and the '
        'output file can be gigantic if profiling many steps.')
    flags.DEFINE_integer(
        name='train_steps',
        default=None,
        help='The number of steps to run for training. If it is larger than '
        '# batches per epoch, then use # batches per epoch. This flag will be '
        'ignored if train_epochs is set to be larger than 1. ')
    flags.DEFINE_boolean(
        name='batchnorm_spatial_persistent',
        default=True,
        help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
    flags.DEFINE_boolean(
        name='enable_get_next_as_optional',
        default=False,
        help='Enable get_next_as_optional behavior in DistributedIterator.')
    flags.DEFINE_boolean(
        name='enable_checkpoint_and_export',
        default=False,
        help=
        'Whether to enable a checkpoint callback and export the savedmodel.')
    flags.DEFINE_string(name='tpu',
                        default='',
                        help='TPU address to connect to.')
    flags.DEFINE_integer(
        name='steps_per_loop',
        default=None,
        help='Number of steps per training loop. Only training step happens '
        'inside the loop. Callbacks will not be called inside. Will be capped at '
        'steps per epoch.')
    flags.DEFINE_boolean(
        name='use_tf_while_loop',
        default=True,
        help='Whether to build a tf.while_loop inside the training loop on the '
        'host. Setting it to True is critical to have peak performance on '
        'TPU.')

    if model:
        flags.DEFINE_string(
            'model', 'resnet50_v1.5',
            'Name of model preset. (mobilenet, resnet50_v1.5)')
    if optimizer:
        flags.DEFINE_string(
            'optimizer', 'resnet50_default', 'Name of optimizer preset. '
            '(mobilenet_default, resnet50_default)')
        # TODO(kimjaehong): Replace as general hyper-params not only for mobilenet.
        flags.DEFINE_float(
            'initial_learning_rate_per_sample', 0.00007,
            'Initial value of learning rate per sample for '
            'mobilenet_default.')
        flags.DEFINE_float(
            'lr_decay_factor', 0.94,
            'Learning rate decay factor for mobilenet_default.')
        flags.DEFINE_float(
            'num_epochs_per_decay', 2.5,
            'Number of epochs per decay for mobilenet_default.')
    if pretrained_filepath:
        flags.DEFINE_string('pretrained_filepath', '', 'Pretrained file path.')