Beispiel #1
0
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='2',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)

  # The current implementation of ResNet v1 is numerically unstable when run
  # with fp16 and will produce NaN errors soon after training begins.
  msg = ('ResNet version 1 is not currently supported with fp16. '
         'Please use version 2 instead.')
  @flags.multi_flags_validator(['dtype', 'resnet_version'], message=msg)
  def _forbid_v1_fp16(flag_values):  # pylint: disable=unused-variable
    return (flags_core.DTYPE_MAP[flag_values['dtype']][0] != tf.float16 or
            flag_values['resnet_version'] != '1')
Beispiel #2
0
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='2',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)

  # The current implementation of ResNet v1 is numerically unstable when run
  # with fp16 and will produce NaN errors soon after training begins.
  msg = ('ResNet version 1 is not currently supported with fp16. '
         'Please use version 2 instead.')
  @flags.multi_flags_validator(['dtype', 'resnet_version'], message=msg)
  def _forbid_v1_fp16(flag_values):  # pylint: disable=unused-variable
    return (flags_core.DTYPE_MAP[flag_values['dtype']][0] != tf.float16 or
            flag_values['resnet_version'] != '1')
Beispiel #3
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='2',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #4
0
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='2',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name="eval_only", default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))
Beispiel #6
0
def define_wide_deep_flags():
    """Add supervised learning flags, as well as wide-deep model type."""
    flags_core.define_base(clean=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           stop_threshold=True,
                           hooks=True,
                           export_dir=True)
    flags_core.define_benchmark()
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=True,
                                  intra_op=True,
                                  synthetic_data=False,
                                  max_train_steps=False,
                                  dtype=False,
                                  all_reduce_alg=False)

    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(name="model_type",
                      short_name="mt",
                      default="wide_deep",
                      enum_values=['wide', 'deep', 'wide_deep'],
                      help="Select model topology.")
    flags.DEFINE_boolean(
        name="download_if_missing",
        default=True,
        help=flags_core.help_wrap(
            "Download data to data_dir if it is not already present."))
Beispiel #7
0
def define_net_flags():
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    #flags.DEFINE_enum(
    #    name='resnet_version', short_name='rv', default='1',
    #    enum_values=['1', '2'],
    #    help=flags_core.help_wrap(
    #        'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
Beispiel #8
0
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_integer(
      name="num_train_images", default=1000,
      help=flags_core.help_wrap(
          "The number of synthetic images for training. The default value is "
          "1000."))

  flags.DEFINE_integer(
      name="num_eval_images", default=50,
      help=flags_core.help_wrap(
          "The number of synthetic images for evaluation. The default value is "
          "50."))

  flags.DEFINE_boolean(
      name="eager", default=False, help=flags_core.help_wrap(
          "To enable eager execution. Note that if eager execution is enabled, "
          "only one GPU is utilized even if multiple GPUs are provided and "
          "multi_gpu_model is used."))

  flags.DEFINE_boolean(
      name="dist_strat", default=False, help=flags_core.help_wrap(
          "To enable distribution strategy for model training and evaluation. "
          "Number of GPUs used for distribution strategy can be set by the "
          "argument --num_gpus."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))

  @flags.multi_flags_validator(
      ["eager", "dist_strat"],
      message="Both --eager and --dist_strat were set. Only one can be "
              "defined, as DistributionStrategy is not supported in Eager "
              "execution currently.")
  # pylint: disable=unused-variable
  def _check_eager_dist_strat(flag_dict):
    return not(flag_dict["eager"] and flag_dict["dist_strat"])
Beispiel #9
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance(
      num_parallel_calls=True, inter_op=True,  intra_op=True,
      dynamic_loss_scale=True, loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
Beispiel #10
0
def define_wide_deep_flags():
    """Add supervised learning flags, as well as wide-deep model type."""
    flags_core.define_base()
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(name="model_type",
                      short_name="mt",
                      default="wide_deep",
                      enum_values=['wide', 'deep', 'wide_deep'],
                      help="Select model topology.")
    flags.DEFINE_boolean(
        name="download_if_missing",
        default=True,
        help=flags_core.help_wrap(
            "Download data to data_dir if it is not already present."))
    flags.DEFINE_integer(
        name="inter_op_parallelism_threads",
        short_name="inter",
        default=0,
        help="Number of threads to use for inter-op parallelism. "
        "If left as default value of 0, the system will pick an appropriate number."
    )
    flags.DEFINE_integer(
        name="intra_op_parallelism_threads",
        short_name="intra",
        default=0,
        help="Number of threads to use for intra-op parallelism. "
        "If left as default value of 0, the system will pick an appropriate number."
    )
Beispiel #11
0
def define_keras_benchmark_flags():
    """Add flags for keras built-in application models."""
    flags_core.define_base(hooks=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(data_format="channels_last",
                            use_synthetic_data=True,
                            batch_size=32,
                            train_epochs=2)

    flags.DEFINE_enum(name="model",
                      default=None,
                      enum_values=MODELS.keys(),
                      case_sensitive=False,
                      help=flags_core.help_wrap("Model to be benchmarked."))

    flags.DEFINE_list(
        name="callbacks",
        default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
        help=flags_core.help_wrap(
            "A list of (case insensitive) strings to specify the names of "
            "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
            "LoggingMetricCallback`"))
Beispiel #12
0
def define_keras_benchmark_flags():
    """Add flags for keras built-in application models."""
    flags_core.define_base(hooks=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(
        data_format="channels_last",
        use_synthetic_data=True,
        batch_size=32,
        train_epochs=2)

    flags.DEFINE_enum(
        name="model", default=None,
        enum_values=MODELS.keys(), case_sensitive=False,
        help=flags_core.help_wrap(
            "Model to be benchmarked."))

    flags.DEFINE_integer(
        name="num_train_images", default=1000,
        help=flags_core.help_wrap(
            "The number of synthetic images for training. The default value is "
            "1000."))

    flags.DEFINE_integer(
        name="num_eval_images", default=50,
        help=flags_core.help_wrap(
            "The number of synthetic images for evaluation. The default value is "
            "50."))

    flags.DEFINE_boolean(
        name="eager", default=False, help=flags_core.help_wrap(
            "To enable eager execution. Note that if eager execution is enabled, "
            "only one GPU is utilized even if multiple GPUs are provided and "
            "multi_gpu_model is used."))

    flags.DEFINE_boolean(
        name="dist_strat", default=False, help=flags_core.help_wrap(
            "To enable distribution strategy for model training and evaluation. "
            "Number of GPUs used for distribution strategy can be set by the "
            "argument --num_gpus."))

    flags.DEFINE_list(
        name="callbacks",
        default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
        help=flags_core.help_wrap(
            "A list of (case insensitive) strings to specify the names of "
            "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
            "LoggingMetricCallback`"))

    @flags.multi_flags_validator(
        ["eager", "dist_strat"],
        message="Both --eager and --dist_strat were set. Only one can be "
                "defined, as DistributionStrategy is not supported in Eager "
                "execution currently.")
    # pylint: disable=unused-variable
    def _check_eager_dist_strat(flag_dict):
        return not (flag_dict["eager"] and flag_dict["dist_strat"])
def define_flags():
    """Add supervised learning flags, as well as wide-deep model type."""
    flags_core.define_base()
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    model_dir = os.path.join(DIRPROJECT, "patients_model")
    export_dir = os.path.join(model_dir, "export_model")

    flags_core.set_defaults(data_dir=os.path.join(DIRPROJECT, 'data'),
                            model_dir=model_dir,
                            export_dir=export_dir,
                            hidden_units=[60, 40, 40, 20],
                            train_epochs=1000,
                            epochs_between_evals=1,
                            batch_size=64,
                            learningrate=0.001)

    flags.DEFINE_bool('enable_dp', False, 'Enable Differential Privacy')
    flags.DEFINE_float('dp_eps', 10, 'Differential Privacy Epsilon')
    flags.DEFINE_float('dp_delta', 1e-5, 'Differential Privacy Delta')
    flags.DEFINE_float('dp_sigma', 0.5, 'Differential Privacy Noise Amount')
    flags.DEFINE_float('dp_c', 1, 'Differential Privacy Norm Clipping Amount')
    flags.DEFINE_integer('dp_num_microbatches', 64,
                         'Number of microbatches to use in DP optimizer')
    flags.DEFINE_bool('force_cpu', False, 'Force CPU usage')
def define_train_higgs_flags():
  """Add tree related flags as well as training/eval configuration."""
  flags_core.define_base(clean=False, stop_threshold=False, batch_size=False,
                         num_gpu=False)
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_integer(
      name="train_start", default=0,
      help=help_wrap("Start index of train examples within the data."))
  flags.DEFINE_integer(
      name="train_count", default=1000000,
      help=help_wrap("Number of train examples within the data."))
  flags.DEFINE_integer(
      name="eval_start", default=10000000,
      help=help_wrap("Start index of eval examples within the data."))
  flags.DEFINE_integer(
      name="eval_count", default=1000000,
      help=help_wrap("Number of eval examples within the data."))

  flags.DEFINE_integer(
      "n_trees", default=100, help=help_wrap("Number of trees to build."))
  flags.DEFINE_integer(
      "max_depth", default=6, help=help_wrap("Maximum depths of each tree."))
  flags.DEFINE_float(
      "learning_rate", default=0.1,
      help=help_wrap("The learning rate."))

  flags_core.set_defaults(data_dir="/tmp/higgs_data",
                          model_dir="/tmp/higgs_model")
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  datasets_num_parallel_batches=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_flags():
    """Add flags to run the main function."""

    # Add common flags
    flags_core.define_base(export_dir=False)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=False,
                                  max_train_steps=False,
                                  dtype=False)
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(model_dir=None,
                            train_epochs=30,
                            batch_size=30,
                            hooks="")

    # Add domain-specific flags
    flags.DEFINE_enum(
        name="dataset",
        default=dataset.DATASET_IMDB,
        enum_values=[dataset.DATASET_IMDB],
        case_sensitive=False,
        help=flags_core.help_wrap("Dataset to be trained and evaluated."))

    flags.DEFINE_integer(name="vocabulary_size",
                         default=6000,
                         help=flags_core.help_wrap(
                             "The number of the most frequent tokens"
                             "to be used from the corpus."))

    flags.DEFINE_integer(
        name="sentence_length",
        default=200,
        help=flags_core.help_wrap(
            "The number of words in each sentence. Longer sentences get cut,"
            "shorter ones padded."))

    flags.DEFINE_integer(
        name="embedding_dim",
        default=256,
        help=flags_core.help_wrap("The dimension of the Embedding layer."))

    flags.DEFINE_integer(
        name="cnn_filters",
        default=512,
        help=flags_core.help_wrap("The number of the CNN layer filters."))

    flags.DEFINE_float(
        name="dropout_rate",
        default=0.7,
        help=flags_core.help_wrap("The rate for the Dropout layer."))
Beispiel #17
0
def define_vgg_flags(vgg_size_choices=None):
  """Add flags and validators for VGG."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                datasets_num_parallel_batches=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name='eval_only', default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))
  flags.DEFINE_boolean(
      name='image_bytes_as_serving_input', default=False,
      help=flags_core.help_wrap(
          'If True exports savedmodel with serving signature that accepts '
          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
          'represents the image. The former is easier to use for serving at '
          'the expense of image resize/cropping being done as part of model '
          'inference. Note, this flag only applies to ImageNet and cannot '
          'be used for CIFAR.'))
  flags.DEFINE_boolean(
      name='turn_off_distribution_strategy', default=False,
      help=flags_core.help_wrap('Set to True to not use distribution '
                                'strategies.'))

  lottery.add_flags(flags)

  choice_kwargs = dict(
      name='vgg_size', short_name='vs', default='16',
      help=flags_core.help_wrap('The size of the VGG model to use.'))

  if vgg_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=vgg_size_choices, **choice_kwargs)

  flags.DEFINE_string('lth_prediction_result_dir', '', 'Prediction result dir.')
  flags.DEFINE_boolean('lth_generate_predictions', False, 'generate_predictions')
  flags.DEFINE_boolean('lth_no_pruning', False, 'no pruning')
def define_flags():
    """Add supervised learning flags, as well as wide-deep model type."""
    flags_core.define_base()
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(data_dir=DIRPROJECT + 'data/',
                            model_dir='/tmp/patients_model',
                            export_dir='/tmp/patients_model/export_model',
                            train_epochs=250,
                            epochs_between_evals=1,
                            batch_size=160)
Beispiel #19
0
def define_wide_deep_flags():
  """Add supervised learning flags, as well as wide-deep model type."""
  flags_core.define_base()
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name="model_type", short_name="mt", default="wide_deep",
      enum_values=['wide', 'deep', 'wide_deep'],
      help="Select model topology.")
  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))
Beispiel #20
0
def define_wide_deep_flags():
  """Add supervised learning flags, as well as wide-deep model type."""
  flags_core.define_base()
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name="model_type", short_name="mt", default="wide_deep",
      enum_values=['wide', 'deep', 'wide_deep'],
      help="Select model topology.")
  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))
def define_flags():
    flags_core.define_base(clean=True,
                           num_gpu=False,
                           train_epochs=True,
                           epochs_between_evals=True)
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=True,
                                  intra_op=True,
                                  dynamic_loss_scale=True,
                                  loss_scale=True,
                                  synthetic_data=True,
                                  dtype=True)
    flags_core.define_image()
    flags_core.define_benchmark()
Beispiel #22
0
def define_resnet_flags(resnet_size_choices=None):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                datasets_num_parallel_batches=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='1',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name='eval_only', default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))
  flags.DEFINE_boolean(
      name='image_bytes_as_serving_input', default=False,
      help=flags_core.help_wrap(
          'If True exports savedmodel with serving signature that accepts '
          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
          'represents the image. The former is easier to use for serving at '
          'the expense of image resize/cropping being done as part of model '
          'inference. Note, this flag only applies to ImageNet and cannot '
          'be used for CIFAR.'))
  flags.DEFINE_boolean(
      name='turn_off_distribution_strategy', default=False,
      help=flags_core.help_wrap('Set to True to not use distribution '
                                'strategies.'))
  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #23
0
def define_wide_deep_flags():
    flags_core.define_base()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name="model_type", short_name="mt", default="wide_deep",
        enum_values=['wide', 'deep', 'wide_deep'],
        help="Select model type.")

    flags_core.set_defaults(data_dir='data',
                            model_dir='model',
                            train_epochs=60,
                            epochs_between_evals=2,
                            batch_size=300)
Beispiel #24
0
def define_wide_deep_flags():
  """Add supervised learning flags, as well as wide-deep model type."""
  flags_core.define_base()
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name="model_type", short_name="mt", default="wide_deep",
      enum_values=['wide', 'deep', 'wide_deep'],
      help="Select model topology.")

  flags_core.set_defaults(data_dir='/tmp/census_data',
                          model_dir='/tmp/census_model',
                          train_epochs=40,
                          epochs_between_evals=2,
                          batch_size=40)
Beispiel #25
0
def define_wide_deep_flags():
  """Add supervised learning flags, as well as wide-deep model type."""
  flags_core.define_base()
  flags_core.define_benchmark()
  flags_core.define_performance(
      num_parallel_calls=False, inter_op=True, intra_op=True,
      synthetic_data=False, max_train_steps=False, dtype=False,
      all_reduce_alg=False)

  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name="model_type", short_name="mt", default="wide_deep",
      enum_values=['wide', 'deep', 'wide_deep'],
      help="Select model topology.")
  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))
Beispiel #26
0
def define_densenet_flags():
  """Add flags and validators for DenseNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  # print('==============================')
  # 这一部分定义的相当于 params

  flags.DEFINE_integer('d', 40, 'Depth of model')
  flags.DEFINE_integer('k', 12, 'Densenet k')
  flags.DEFINE_integer('compressionRate', 2, 'Densenet transition layer compression rate')
  flags.DEFINE_integer('expansion', 4, 'DenseNet bottleneck structure expansion rate')
  flags.DEFINE_boolean('bottleneck', False, 'Densenet whether to use bottleneck structure or nor')
  
  '''
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    hparams_config.define_common_flags(flags)

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #28
0
def define_keras_benchmark_flags():
    """Add flags for keras built-in application models."""
    flags_core.define_base(hooks=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(data_format="channels_last",
                            use_synthetic_data=True,
                            batch_size=32,
                            train_epochs=2)

    flags.DEFINE_enum(name="model",
                      default=None,
                      enum_values=MODELS.keys(),
                      case_sensitive=False,
                      help=flags_core.help_wrap("Model to be benchmarked."))

    flags.DEFINE_integer(
        name="num_images",
        default=1000,
        help=flags_core.help_wrap(
            "The number of synthetic images for training and evaluation. The "
            "default value is 1000."))

    flags.DEFINE_boolean(
        name="eager",
        default=False,
        help=flags_core.help_wrap(
            "To enable eager execution. Note that if eager execution is enabled, "
            "only one GPU is utilized even if multiple GPUs are provided and "
            "multi_gpu_model is used."))

    flags.DEFINE_list(
        name="callbacks",
        default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
        help=flags_core.help_wrap(
            "A list of (case insensitive) strings to specify the names of "
            "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
            "LoggingMetricCallback`"))
Beispiel #29
0
def define_keras_benchmark_flags():
  """Add flags for keras built-in application models."""
  flags_core.define_base(hooks=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      data_format="channels_last",
      use_synthetic_data=True,
      batch_size=32,
      train_epochs=2)

  flags.DEFINE_enum(
      name="model", default=None,
      enum_values=MODELS.keys(), case_sensitive=False,
      help=flags_core.help_wrap(
          "Model to be benchmarked."))

  flags.DEFINE_integer(
      name="num_images", default=1000,
      help=flags_core.help_wrap(
          "The number of synthetic images for training and evaluation. The "
          "default value is 1000."))

  flags.DEFINE_boolean(
      name="eager", default=False, help=flags_core.help_wrap(
          "To enable eager execution. Note that if eager execution is enabled, "
          "only one GPU is utilized even if multiple GPUs are provided and "
          "multi_gpu_model is used."))

  flags.DEFINE_list(
      name="callbacks",
      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
      help=flags_core.help_wrap(
          "A list of (case insensitive) strings to specify the names of "
          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
          "LoggingMetricCallback`"))
Beispiel #30
0
def define_transformer_flags():
    """Add flags and flag validators for running transformer_main."""
    # Add common flags (data_dir, model_dir, train_epochs, etc.).
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=True,
                                  max_train_steps=False,
                                  dtype=True,
                                  loss_scale=True,
                                  all_reduce_alg=True,
                                  enable_xla=True)

    # Additional performance flags
    # TODO(b/76028325): Remove when generic layout optimizer is ready.
    flags.DEFINE_boolean(
        name='enable_grappler_layout_optimizer',
        default=True,
        help='Enable Grappler layout optimizer. Currently Grappler can '
        'de-optimize fp16 graphs by forcing NCHW layout for all '
        'convolutions and batch normalizations, and this flag allows to '
        'disable it.')

    flags_core.define_benchmark()
    flags_core.define_device(tpu=True)

    flags.DEFINE_integer(
        name='train_steps',
        short_name='ts',
        default=300000,
        help=flags_core.help_wrap('The number of steps used to train.'))
    flags.DEFINE_integer(
        name='steps_between_evals',
        short_name='sbe',
        default=1000,
        help=flags_core.help_wrap(
            'The Number of training steps to run between evaluations. This is '
            'used if --train_steps is defined.'))
    flags.DEFINE_boolean(name='enable_time_history',
                         default=True,
                         help='Whether to enable TimeHistory callback.')
    flags.DEFINE_boolean(name='enable_tensorboard',
                         default=False,
                         help='Whether to enable Tensorboard callback.')
    flags.DEFINE_boolean(name='enable_metrics_in_training',
                         default=False,
                         help='Whether to enable metrics during training.')
    flags.DEFINE_string(
        name='profile_steps',
        default=None,
        help='Save profiling data to model dir at given range of steps. The '
        'value must be a comma separated pair of positive integers, specifying '
        'the first and last step to profile. For example, "--profile_steps=2,4" '
        'triggers the profiler to process 3 steps, starting from the 2nd step. '
        'Note that profiler has a non-trivial performance overhead, and the '
        'output file can be gigantic if profiling many steps.')
    # Set flags from the flags_core module as 'key flags' so they're listed when
    # the '-h' flag is used. Without this line, the flags defined above are
    # only shown in the full `--helpful` help text.
    flags.adopt_module_key_flags(flags_core)

    # Add transformer-specific flags
    flags.DEFINE_enum(
        name='param_set',
        short_name='mp',
        default='big',
        enum_values=PARAMS_MAP.keys(),
        help=flags_core.help_wrap(
            'Parameter set to use when creating and training the model. The '
            'parameters define the input shape (batch size and max length), '
            'model configuration (size of embedding, # of hidden layers, etc.), '
            'and various other settings. The big parameter set increases the '
            'default batch size, embedding/hidden size, and filter size. For a '
            'complete list of parameters, please see model/model_params.py.'))

    flags.DEFINE_bool(
        name='static_batch',
        short_name='sb',
        default=False,
        help=flags_core.help_wrap(
            'Whether the batches in the dataset should have static shapes. In '
            'general, this setting should be False. Dynamic shapes allow the '
            'inputs to be grouped so that the number of padding tokens is '
            'minimized, and helps model training. In cases where the input shape '
            'must be static (e.g. running on TPU), this setting will be ignored '
            'and static batching will always be used.'))
    flags.DEFINE_integer(
        name='max_length',
        short_name='ml',
        default=256,
        help=flags_core.help_wrap(
            'Max sentence length for Transformer. Default is 256. Note: Usually '
            'it is more effective to use a smaller max length if static_batch is '
            'enabled, e.g. 64.'))

    # Flags for training with steps (may be used for debugging)
    flags.DEFINE_integer(
        name='validation_steps',
        short_name='vs',
        default=64,
        help=flags_core.help_wrap('The number of steps used in validation.'))

    # BLEU score computation
    flags.DEFINE_string(
        name='bleu_source',
        short_name='bls',
        default=None,
        help=flags_core.help_wrap(
            'Path to source file containing text translate when calculating the '
            'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
            'Use the flag --stop_threshold to stop the script based on the '
            'uncased BLEU score.'))
    flags.DEFINE_string(
        name='bleu_ref',
        short_name='blr',
        default=None,
        help=flags_core.help_wrap(
            'Path to source file containing text translate when calculating the '
            'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
            'Use the flag --stop_threshold to stop the script based on the '
            'uncased BLEU score.'))
    flags.DEFINE_string(
        name='vocab_file',
        short_name='vf',
        default=None,
        help=flags_core.help_wrap(
            'Path to subtoken vocabulary file. If data_download.py was used to '
            'download and encode the training data, look in the data_dir to find '
            'the vocab file.'))
    flags.DEFINE_string(
        name='mode',
        default='train',
        help=flags_core.help_wrap('mode: train, eval, or predict'))

    flags_core.set_defaults(data_dir='/tmp/translate_ende',
                            model_dir='/tmp/transformer_model',
                            batch_size=None,
                            train_epochs=10)

    # pylint: disable=unused-variable
    @flags.multi_flags_validator(
        ['mode', 'train_epochs'],
        message='--train_epochs must be defined in train mode')
    def _check_train_limits(flag_dict):
        if flag_dict['mode'] == 'train':
            return flag_dict['train_epochs'] is not None
        return True

    @flags.multi_flags_validator(
        ['bleu_source', 'bleu_ref'],
        message='Both or neither --bleu_source and --bleu_ref must be defined.'
    )
    def _check_bleu_files(flags_dict):
        return (flags_dict['bleu_source'] is None) == (flags_dict['bleu_ref']
                                                       is None)

    @flags.multi_flags_validator(
        ['bleu_source', 'bleu_ref', 'vocab_file'],
        message='--vocab_file must be defined if --bleu_source and --bleu_ref '
        'are defined.')
    def _check_bleu_vocab_file(flags_dict):
        if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
            return flags_dict['vocab_file'] is not None
        return True

    @flags.multi_flags_validator(
        ['export_dir', 'vocab_file'],
        message='--vocab_file must be defined if --export_dir is set.')
    def _check_export_vocab_file(flags_dict):
        if flags_dict['export_dir']:
            return flags_dict['vocab_file'] is not None
        return True

    # pylint: enable=unused-variable

    flags_core.require_cloud_storage(['data_dir', 'model_dir', 'export_dir'])
Beispiel #31
0
def main(_):
    if not flags.FLAGS.benchmark_log_dir:
        print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir")
        sys.exit(1)

    uploader = benchmark_uploader.BigQueryUploader(
        gcp_project=flags.FLAGS.gcp_project)
    run_id = str(uuid.uuid4())
    run_json_file = os.path.join(
        flags.FLAGS.benchmark_log_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME)
    metric_json_file = os.path.join(
        flags.FLAGS.benchmark_log_dir, logger.METRIC_LOG_FILE_NAME)

    uploader.upload_benchmark_run_file(
        flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_table, run_id,
        run_json_file)
    uploader.upload_metric_file(
        flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_metric_table, run_id,
        metric_json_file)
    # Assume the run finished successfully before user invoke the upload script.
    uploader.insert_run_status(
        flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_status_table,
        run_id, logger.RUN_STATUS_SUCCESS)


if __name__ == "__main__":
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)
    absl_app.run(main=main)
Beispiel #32
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_integer(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=1.0,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_enum(
      name="constructor_type", default="bisection",
      enum_values=["bisection", "materialized"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Strategy to use for generating false negatives. materialized has a"
          "precompute that scales badly, but a faster per-epoch construction"
          "time and can be faster on very large systems."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_boolean(
      name="turn_off_distribution_strategy",
      default=False,
      help=flags_core.help_wrap(
          "If set, do not use any distribution strategy."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="clone_model_in_keras_dist_strat",
      default=True,
      help=flags_core.help_wrap(
          'If False, then the experimental code path is used that doesn\'t '
          "clone models for distribution."))

  flags.DEFINE_bool(
      name="early_stopping",
      default=False,
      help=flags_core.help_wrap(
          'If True, we stop the training when it reaches hr_threshold'))
Beispiel #33
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_string(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_bool(
      name="hash_pipeline", default=False, help=flags_core.help_wrap(
          "This flag will perform a separate run of the pipeline and hash "
          "batches as they are produced. \nNOTE: this will significantly slow "
          "training. However it is useful to confirm that a random seed is "
          "does indeed make the data pipeline deterministic."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_subprocess", default=True, help=flags_core.help_wrap(
          "By default, ncf_main.py starts async data generation process as a "
          "subprocess. If set to False, ncf_main.py will assume the async data "
          "generation process has already been started by the user."))

  flags.DEFINE_integer(name="cache_id", default=None, help=flags_core.help_wrap(
      "Use a specified cache_id rather than using a timestamp. This is only "
      "needed to synchronize across multiple workers. Generally this flag will "
      "not need to be set."
  ))

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="use_estimator", default=True, help=flags_core.help_wrap(
          "If True, use Estimator to train. Setting to False is slightly "
          "faster, but when False, the following are currently unsupported:\n"
          "  * Using TPUs\n"
          "  * Using more than 1 GPU\n"
          "  * Reloading from checkpoints\n"
          "  * Any hooks specified with --hooks\n"))

  flags.DEFINE_bool(
      name="use_while_loop", default=None, help=flags_core.help_wrap(
          "If set, run an entire epoch in a session.run() call using a "
          "TensorFlow while loop. This can improve performance, but will not "
          "print out losses throughout the epoch. Requires "
          "--use_estimator=false"
      ))

  xla_message = "--use_while_loop requires --use_estimator=false"
  @flags.multi_flags_validator(["use_while_loop", "use_estimator"],
                               message=xla_message)
  def while_loop_validator(flag_dict):
    return (not flag_dict["use_while_loop"] or
            not flag_dict["use_estimator"])
Beispiel #34
0
def define_deep_speech_flags():
    """Add flags for run_deep_speech."""
    # Add common flags
    flags_core.define_base(
        data_dir=False,  # we use train_data_dir and eval_data_dir instead
        export_dir=True,
        train_epochs=True,
        hooks=True,
        num_gpu=True,
        epochs_between_evals=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=False,
                                  max_train_steps=True,
                                  dtype=False)
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(model_dir="/tmp/deep_speech_model/",
                            export_dir="/tmp/deep_speech_saved_model/",
                            train_epochs=10,
                            batch_size=128,
                            hooks="")

    # Deep speech flags
    flags.DEFINE_integer(name="seed",
                         default=1,
                         help=flags_core.help_wrap("The random seed."))

    flags.DEFINE_string(
        name="train_data_dir",
        default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
        help=flags_core.help_wrap("The csv file path of train dataset."))

    flags.DEFINE_string(
        name="eval_data_dir",
        default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
        help=flags_core.help_wrap("The csv file path of evaluation dataset."))

    flags.DEFINE_bool(
        name="sortagrad",
        default=True,
        help=flags_core.help_wrap(
            "If true, sort examples by audio length and perform no "
            "batch_wise shuffling for the first epoch."))

    flags.DEFINE_integer(
        name="sample_rate",
        default=16000,
        help=flags_core.help_wrap("The sample rate for audio."))

    flags.DEFINE_integer(
        name="window_ms",
        default=20,
        help=flags_core.help_wrap("The frame length for spectrogram."))

    flags.DEFINE_integer(name="stride_ms",
                         default=10,
                         help=flags_core.help_wrap("The frame step."))

    flags.DEFINE_string(
        name="vocabulary_file",
        default=_VOCABULARY_FILE,
        help=flags_core.help_wrap("The file path of vocabulary file."))

    # RNN related flags
    flags.DEFINE_integer(name="rnn_hidden_size",
                         default=800,
                         help=flags_core.help_wrap("The hidden size of RNNs."))

    flags.DEFINE_integer(
        name="rnn_hidden_layers",
        default=5,
        help=flags_core.help_wrap("The number of RNN layers."))

    flags.DEFINE_bool(name="use_bias",
                      default=True,
                      help=flags_core.help_wrap(
                          "Use bias in the last fully-connected layer"))

    flags.DEFINE_bool(
        name="is_bidirectional",
        default=True,
        help=flags_core.help_wrap("If rnn unit is bidirectional"))

    flags.DEFINE_enum(name="rnn_type",
                      default="gru",
                      enum_values=deep_speech_model.SUPPORTED_RNNS.keys(),
                      case_sensitive=False,
                      help=flags_core.help_wrap("Type of RNN cell."))

    # Training related flags
    flags.DEFINE_float(name="learning_rate",
                       default=5e-4,
                       help=flags_core.help_wrap("The initial learning rate."))

    flags.DEFINE_bool(name="skip_eval",
                      default=False,
                      help=flags_core.help_wrap("Whether to skip eval"))

    # Evaluation metrics threshold
    flags.DEFINE_float(
        name="wer_threshold",
        default=None,
        help=flags_core.help_wrap(
            "If passed, training will stop when the evaluation metric WER is "
            "greater than or equal to wer_threshold. For libri speech dataset "
            "the desired wer_threshold is 0.23 which is the result achieved by "
            "MLPerf implementation."))
def define_flags():
    flags_core.define_base(multi_gpu=True, num_gpu=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
Beispiel #36
0
def define_deep_speech_flags():
  """Add flags for run_deep_speech."""
  # Add common flags
  flags_core.define_base(
      data_dir=False  # we use train_data_dir and eval_data_dir instead
  )
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=False,
      max_train_steps=False,
      dtype=False
  )
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/deep_speech_model/",
      export_dir="/tmp/deep_speech_saved_model/",
      train_epochs=10,
      batch_size=128,
      hooks="")

  # Deep speech flags
  flags.DEFINE_integer(
      name="seed", default=1,
      help=flags_core.help_wrap("The random seed."))

  flags.DEFINE_string(
      name="train_data_dir",
      default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
      help=flags_core.help_wrap("The csv file path of train dataset."))

  flags.DEFINE_string(
      name="eval_data_dir",
      default="/tmp/librispeech_data/test-clean/LibriSpeech/test-clean.csv",
      help=flags_core.help_wrap("The csv file path of evaluation dataset."))

  flags.DEFINE_bool(
      name="sortagrad", default=True,
      help=flags_core.help_wrap(
          "If true, sort examples by audio length and perform no "
          "batch_wise shuffling for the first epoch."))

  flags.DEFINE_integer(
      name="sample_rate", default=16000,
      help=flags_core.help_wrap("The sample rate for audio."))

  flags.DEFINE_integer(
      name="window_ms", default=20,
      help=flags_core.help_wrap("The frame length for spectrogram."))

  flags.DEFINE_integer(
      name="stride_ms", default=10,
      help=flags_core.help_wrap("The frame step."))

  flags.DEFINE_string(
      name="vocabulary_file", default=_VOCABULARY_FILE,
      help=flags_core.help_wrap("The file path of vocabulary file."))

  # RNN related flags
  flags.DEFINE_integer(
      name="rnn_hidden_size", default=800,
      help=flags_core.help_wrap("The hidden size of RNNs."))

  flags.DEFINE_integer(
      name="rnn_hidden_layers", default=5,
      help=flags_core.help_wrap("The number of RNN layers."))

  flags.DEFINE_bool(
      name="use_bias", default=True,
      help=flags_core.help_wrap("Use bias in the last fully-connected layer"))

  flags.DEFINE_bool(
      name="is_bidirectional", default=True,
      help=flags_core.help_wrap("If rnn unit is bidirectional"))

  flags.DEFINE_enum(
      name="rnn_type", default="gru",
      enum_values=deep_speech_model.SUPPORTED_RNNS.keys(),
      case_sensitive=False,
      help=flags_core.help_wrap("Type of RNN cell."))

  # Training related flags
  flags.DEFINE_float(
      name="learning_rate", default=5e-4,
      help=flags_core.help_wrap("The initial learning rate."))

  # Evaluation metrics threshold
  flags.DEFINE_float(
      name="wer_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric WER is "
          "greater than or equal to wer_threshold. For libri speech dataset "
          "the desired wer_threshold is 0.23 which is the result achieved by "
          "MLPerf implementation."))
Beispiel #37
0
def define_resnet_flags(resnet_size_choices=None,
                        dynamic_loss_scale=False,
                        fp16_implementation=False):
    """Add flags and validators for ResNet."""
    flags_core.define_base(clean=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           stop_threshold=True,
                           num_gpu=True,
                           hooks=True,
                           export_dir=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=True,
                                  intra_op=True,
                                  synthetic_data=True,
                                  dtype=True,
                                  all_reduce_alg=True,
                                  num_packs=True,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  fp16_implementation=fp16_implementation,
                                  loss_scale=True,
                                  tf_data_experimental_slack=True,
                                  max_train_steps=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags_core.define_distribution()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(
        name='use_train_and_evaluate',
        default=False,
        help=flags_core.help_wrap(
            'If True, uses `tf.estimator.train_and_evaluate` for the training '
            'and evaluation loop, instead of separate calls to `classifier.train '
            'and `classifier.evaluate`, which is the default behavior.'))
    flags.DEFINE_bool(name='enable_lars',
                      default=False,
                      help=flags_core.help_wrap(
                          'Enable LARS optimizer for large batch training.'))
    flags.DEFINE_float(
        name='label_smoothing',
        default=0.0,
        help=flags_core.help_wrap(
            'Label smoothing parameter used in the softmax_cross_entropy'))
    flags.DEFINE_float(name='weight_decay',
                       default=1e-4,
                       help=flags_core.help_wrap(
                           'Weight decay coefficiant for l2 regularization.'))
    flags.DEFINE_float(name='percent',
                       default=0,
                       help=flags_core.help_wrap('percent of data to poison'))
    flags.DEFINE_bool(
        name='adv_train',
        default=False,
        help=flags_core.help_wrap('whether adversarial training'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #38
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=False,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook")

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))
Beispiel #39
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=False,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_string(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_bool(
      name="ml_perf", default=None,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))
Beispiel #40
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance(dynamic_loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
Beispiel #41
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_integer(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_enum(
      name="constructor_type", default="bisection",
      enum_values=["bisection", "materialized"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Strategy to use for generating false negatives. materialized has a"
          "precompute that scales badly, but a faster per-epoch construction"
          "time and can be faster on very large systems."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_boolean(
      name="turn_off_distribution_strategy",
      default=False,
      help=flags_core.help_wrap(
          "If set, do not use any distribution strategy."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="clone_model_in_keras_dist_strat",
      default=True,
      help=flags_core.help_wrap(
          'If False, then the experimental code path is used that doesn\'t '
          "clone models for distribution."))
Beispiel #42
0
def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, train_epochs, etc.).
  flags_core.define_base()
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=True
  )
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

  # Set flags from the flags_core module as "key flags" so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
      name="param_set", short_name="mp", default="big",
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
          "Parameter set to use when creating and training the model. The "
          "parameters define the input shape (batch size and max length), "
          "model configuration (size of embedding, # of hidden layers, etc.), "
          "and various other settings. The big parameter set increases the "
          "default batch size, embedding/hidden size, and filter size. For a "
          "complete list of parameters, please see model/model_params.py."))

  flags.DEFINE_bool(
      name="static_batch", default=False,
      help=flags_core.help_wrap(
          "Whether the batches in the dataset should have static shapes. In "
          "general, this setting should be False. Dynamic shapes allow the "
          "inputs to be grouped so that the number of padding tokens is "
          "minimized, and helps model training. In cases where the input shape "
          "must be static (e.g. running on TPU), this setting will be ignored "
          "and static batching will always be used."))

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
      name="train_steps", short_name="ts", default=None,
      help=flags_core.help_wrap("The number of steps used to train."))
  flags.DEFINE_integer(
      name="steps_between_evals", short_name="sbe", default=1000,
      help=flags_core.help_wrap(
          "The Number of training steps to run between evaluations. This is "
          "used if --train_steps is defined."))

  # BLEU score computation
  flags.DEFINE_string(
      name="bleu_source", short_name="bls", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="bleu_ref", short_name="blr", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="vocab_file", short_name="vf", default=None,
      help=flags_core.help_wrap(
          "Path to subtoken vocabulary file. If data_download.py was used to "
          "download and encode the training data, look in the data_dir to find "
          "the vocab file."))

  flags_core.set_defaults(data_dir="/tmp/translate_ende",
                          model_dir="/tmp/transformer_model",
                          batch_size=None,
                          train_epochs=None)

  @flags.multi_flags_validator(
      ["train_epochs", "train_steps"],
      message="Both --train_steps and --train_epochs were set. Only one may be "
              "defined.")
  def _check_train_limits(flag_dict):
    return flag_dict["train_epochs"] is None or flag_dict["train_steps"] is None

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref"],
      message="Both or neither --bleu_source and --bleu_ref must be defined.")
  def _check_bleu_files(flags_dict):
    return (flags_dict["bleu_source"] is None) == (
        flags_dict["bleu_ref"] is None)

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref", "vocab_file"],
      message="--vocab_file must be defined if --bleu_source and --bleu_ref "
              "are defined.")
  def _check_bleu_vocab_file(flags_dict):
    if flags_dict["bleu_source"] and flags_dict["bleu_ref"]:
      return flags_dict["vocab_file"] is not None
    return True

  @flags.multi_flags_validator(
      ["export_dir", "vocab_file"],
      message="--vocab_file must be defined if --export_dir is set.")
  def _check_export_vocab_file(flags_dict):
    if flags_dict["export_dir"]:
      return flags_dict["vocab_file"] is not None
    return True

  flags_core.require_cloud_storage(["data_dir", "model_dir", "export_dir"])
def main(_):
  if not flags.FLAGS.benchmark_log_dir:
    print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir")
    sys.exit(1)

  uploader = benchmark_uploader.BigQueryUploader(
      gcp_project=flags.FLAGS.gcp_project)
  run_id = str(uuid.uuid4())
  run_json_file = os.path.join(
      flags.FLAGS.benchmark_log_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME)
  metric_json_file = os.path.join(
      flags.FLAGS.benchmark_log_dir, logger.METRIC_LOG_FILE_NAME)

  uploader.upload_benchmark_run_file(
      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_table, run_id,
      run_json_file)
  uploader.upload_metric_file(
      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_metric_table, run_id,
      metric_json_file)
  # Assume the run finished successfully before user invoke the upload script.
  uploader.insert_run_status(
      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_status_table,
      run_id, logger.RUN_STATUS_SUCCESS)


if __name__ == "__main__":
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)
  absl_app.run(main=main)
Beispiel #44
0
 def setUpClass(cls):  # pylint: disable=invalid-name
   super(BenchmarkLoggerTest, cls).setUpClass()
   flags_core.define_benchmark()
Beispiel #45
0
def define_keras_flags(
    dynamic_loss_scale=True,
    model=False,
    optimizer=False,
    pretrained_filepath=False,
    dataset=False):
  """Define flags for Keras models."""
  flags_core.define_base(clean=True, num_gpu=True, run_eagerly=True,
                         train_epochs=True, epochs_between_evals=True,
                         distribution_strategy=True)
  flags_core.define_performance(num_parallel_calls=False,
                                synthetic_data=True,
                                dtype=True,
                                all_reduce_alg=True,
                                num_packs=True,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                dynamic_loss_scale=dynamic_loss_scale,
                                loss_scale=True,
                                fp16_implementation=True,
                                tf_data_experimental_slack=True,
                                enable_xla=True,
                                training_dataset_cache=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags_core.define_distribution()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?')
  flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?')
  # TODO(b/135607288): Remove this flag once we understand the root cause of
  # slowdown when setting the learning phase in Keras backend.
  flags.DEFINE_boolean(
      name='set_learning_phase_to_train', default=True,
      help='If skip eval, also set Keras learning phase to 1 (training).')
  flags.DEFINE_boolean(
      name='explicit_gpu_placement', default=False,
      help='If not using distribution strategy, explicitly set device scope '
      'for the Keras training loop.')
  flags.DEFINE_boolean(name='use_trivial_model', default=False,
                       help='Whether to use a trivial Keras model.')
  flags.DEFINE_boolean(name='report_accuracy_metrics', default=True,
                       help='Report metrics during training and evaluation.')
  flags.DEFINE_boolean(name='use_tensor_lr', default=True,
                       help='Use learning rate tensor instead of a callback.')
  flags.DEFINE_boolean(
      name='enable_tensorboard', default=False,
      help='Whether to enable Tensorboard callback.')
  flags.DEFINE_integer(
      name='train_steps', default=None,
      help='The number of steps to run for training. If it is larger than '
      '# batches per epoch, then use # batches per epoch. This flag will be '
      'ignored if train_epochs is set to be larger than 1. ')
  flags.DEFINE_string(
      name='profile_steps', default=None,
      help='Save profiling data to model dir at given range of global steps. The '
      'value must be a comma separated pair of positive integers, specifying '
      'the first and last step to profile. For example, "--profile_steps=2,4" '
      'triggers the profiler to process 3 steps, starting from the 2nd step. '
      'Note that profiler has a non-trivial performance overhead, and the '
      'output file can be gigantic if profiling many steps.')
  flags.DEFINE_boolean(
      name='batchnorm_spatial_persistent', default=True,
      help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
  flags.DEFINE_boolean(
      name='enable_get_next_as_optional', default=False,
      help='Enable get_next_as_optional behavior in DistributedIterator.')
  flags.DEFINE_boolean(
      name='enable_checkpoint_and_export', default=False,
      help='Whether to enable a checkpoint callback and export the savedmodel.')
  flags.DEFINE_string(
      name='tpu', default='', help='TPU address to connect to.')
  flags.DEFINE_integer(
      name='steps_per_loop',
      default=500,
      help='Number of steps per training loop. Only training step happens '
      'inside the loop. Callbacks will not be called inside. Will be capped at '
      'steps per epoch.')
  flags.DEFINE_boolean(
      name='use_tf_while_loop',
      default=True,
      help='Whether to build a tf.while_loop inside the training loop on the '
      'host. Setting it to True is critical to have peak performance on '
      'TPU.')
  flags.DEFINE_boolean(
      name='use_tf_keras_layers', default=False,
      help='Whether to use tf.keras.layers instead of tf.python.keras.layers.'
      'It only changes imagenet resnet model layers for now. This flag is '
      'a temporal flag during transition to tf.keras.layers. Do not use this '
      'flag for external usage. this will be removed shortly.')
  flags.DEFINE_boolean(
      name='keras_application_models', default=False,
      help='Whether the model is a keras_applicaiton_models. Purly Experimental.'
  )

  if model:
    flags.DEFINE_string('model', 'resnet50_v1.5',
                        'Name of model preset. (mobilenet, resnet50_v1.5)')
  if optimizer:
    flags.DEFINE_string('optimizer', 'resnet50_default',
                        'Name of optimizer preset. '
                        '(mobilenet_default, resnet50_default)')
    # TODO(kimjaehong): Replace as general hyper-params not only for mobilenet.
    flags.DEFINE_float('initial_learning_rate_per_sample', 0.00007,
                       'Initial value of learning rate per sample for '
                       'mobilenet_default.')
    flags.DEFINE_float('lr_decay_factor', 0.94,
                       'Learning rate decay factor for mobilenet_default.')
    flags.DEFINE_float('num_epochs_per_decay', 2.5,
                       'Number of epochs per decay for mobilenet_default.')
  if pretrained_filepath:
    flags.DEFINE_string('pretrained_filepath', '',
                        'Pretrained file path.')
  if dataset:
    flags.DEFINE_string('dataset', 'imagenet',
                        'Dataset to use.')
Beispiel #46
0
def define_resnet_flags(resnet_size_choices=None, dynamic_loss_scale=False,
                        fp16_implementation=False):
  """Add flags and validators for ResNet."""
  flags_core.define_base()
  flags_core.define_performance(num_parallel_calls=False,
                                tf_gpu_thread_mode=True,
                                datasets_num_private_threads=True,
                                datasets_num_parallel_batches=True,
                                dynamic_loss_scale=dynamic_loss_scale,
                                fp16_implementation=fp16_implementation,
                                loss_scale=True)
  flags_core.define_image()
  flags_core.define_benchmark()
  flags.adopt_module_key_flags(flags_core)

  flags.DEFINE_enum(
      name='resnet_version', short_name='rv', default='1',
      enum_values=['1', '2'],
      help=flags_core.help_wrap(
          'Version of ResNet. (1 or 2) See README.md for details.'))
  flags.DEFINE_bool(
      name='fine_tune', short_name='ft', default=False,
      help=flags_core.help_wrap(
          'If True do not train any parameters except for the final layer.'))
  flags.DEFINE_string(
      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
      help=flags_core.help_wrap(
          'If not None initialize all the network except the final layer with '
          'these values'))
  flags.DEFINE_boolean(
      name='eval_only', default=False,
      help=flags_core.help_wrap('Skip training and only perform evaluation on '
                                'the latest checkpoint.'))
  flags.DEFINE_boolean(
      name='image_bytes_as_serving_input', default=False,
      help=flags_core.help_wrap(
          'If True exports savedmodel with serving signature that accepts '
          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
          'represents the image. The former is easier to use for serving at '
          'the expense of image resize/cropping being done as part of model '
          'inference. Note, this flag only applies to ImageNet and cannot '
          'be used for CIFAR.'))
  flags.DEFINE_boolean(
      name='use_train_and_evaluate', default=False,
      help=flags_core.help_wrap(
          'If True, uses `tf.estimator.train_and_evaluate` for the training '
          'and evaluation loop, instead of separate calls to `classifier.train '
          'and `classifier.evaluate`, which is the default behavior.'))
  flags.DEFINE_string(
      name='worker_hosts', default=None,
      help=flags_core.help_wrap(
          'Comma-separated list of worker ip:port pairs for running '
          'multi-worker models with DistributionStrategy.  The user would '
          'start the program on each host with identical value for this flag.'))
  flags.DEFINE_integer(
      name='task_index', default=-1,
      help=flags_core.help_wrap('If multi-worker training, the task_index of '
                                'this worker.'))
  flags.DEFINE_bool(
      name='enable_lars', default=False,
      help=flags_core.help_wrap(
          'Enable LARS optimizer for large batch training.'))
  flags.DEFINE_float(
      name='label_smoothing', default=0.0,
      help=flags_core.help_wrap(
          'Label smoothing parameter used in the softmax_cross_entropy'))
  flags.DEFINE_float(
      name='weight_decay', default=1e-4,
      help=flags_core.help_wrap(
          'Weight decay coefficiant for l2 regularization.'))

  choice_kwargs = dict(
      name='resnet_size', short_name='rs', default='50',
      help=flags_core.help_wrap('The size of the ResNet model to use.'))

  if resnet_size_choices is None:
    flags.DEFINE_string(**choice_kwargs)
  else:
    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #47
0
def define_resnet_flags(resnet_size_choices=None,
                        dynamic_loss_scale=False,
                        fp16_implementation=False):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  datasets_num_parallel_batches=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  fp16_implementation=fp16_implementation)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(
        name='use_train_and_evaluate',
        default=False,
        help=flags_core.help_wrap(
            'If True, uses `tf.estimator.train_and_evaluate` for the training '
            'and evaluation loop, instead of separate calls to `classifier.train '
            'and `classifier.evaluate`, which is the default behavior.'))
    flags.DEFINE_string(
        name='worker_hosts',
        default=None,
        help=flags_core.help_wrap(
            'Comma-separated list of worker ip:port pairs for running '
            'multi-worker models with DistributionStrategy.  The user would '
            'start the program on each host with identical value for this flag.'
        ))
    flags.DEFINE_integer(name='task_index',
                         default=-1,
                         help=flags_core.help_wrap(
                             'If multi-worker training, the task_index of '
                             'this worker.'))
    flags.DEFINE_bool(name='enable_lars',
                      default=False,
                      help=flags_core.help_wrap(
                          'Enable LARS optimizer for large batch training.'))
    flags.DEFINE_float(
        name='label_smoothing',
        default=0.0,
        help=flags_core.help_wrap(
            'Label smoothing parameter used in the softmax_cross_entropy'))
    flags.DEFINE_float(name='weight_decay',
                       default=1e-4,
                       help=flags_core.help_wrap(
                           'Weight decay coefficiant for l2 regularization.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
Beispiel #48
0
def define_flags():
  flags_core.define_base(num_gpu=False)
  flags_core.define_performance()
  flags_core.define_image()
  flags_core.define_benchmark()