コード例 #1
0
ファイル: run_classifier.py プロジェクト: wuhuaroubj/models
def run_bert(strategy,
             input_meta_data,
             train_input_fn=None,
             eval_input_fn=None):
  """Run BERT training."""
  if FLAGS.model_type == 'bert':
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
  else:
    assert FLAGS.model_type == 'albert'
    bert_config = modeling.AlbertConfig.from_json_file(FLAGS.bert_config_file)
  if FLAGS.mode == 'export_only':
    # As Keras ModelCheckpoint callback used with Keras compile/fit() API
    # internally uses model.save_weights() to save checkpoints, we must
    # use model.load_weights() when Keras compile/fit() is used.
    export_classifier(FLAGS.model_export_path, input_meta_data,
                      FLAGS.use_keras_compile_fit,
                      bert_config, FLAGS.model_dir)
    return

  if FLAGS.mode != 'train_and_eval':
    raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode)
  # Enables XLA in Session Config. Should not be set for TPU.
  keras_utils.set_config_v2(FLAGS.enable_xla)

  epochs = FLAGS.num_train_epochs
  train_data_size = input_meta_data['train_data_size']
  steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
  warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
  eval_steps = int(
      math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

  if not strategy:
    raise ValueError('Distribution strategy has not been specified.')

  trained_model = run_bert_classifier(
      strategy,
      bert_config,
      input_meta_data,
      FLAGS.model_dir,
      epochs,
      steps_per_epoch,
      FLAGS.steps_per_loop,
      eval_steps,
      warmup_steps,
      FLAGS.learning_rate,
      FLAGS.init_checkpoint,
      train_input_fn,
      eval_input_fn,
      run_eagerly=FLAGS.run_eagerly,
      use_keras_compile_fit=FLAGS.use_keras_compile_fit)

  if FLAGS.model_export_path:
    # As Keras ModelCheckpoint callback used with Keras compile/fit() API
    # internally uses model.save_weights() to save checkpoints, we must
    # use model.load_weights() when Keras compile/fit() is used.
    model_saving_utils.export_bert_model(
        FLAGS.model_export_path,
        model=trained_model,
        restore_model_using_load_weights=FLAGS.use_keras_compile_fit)
  return trained_model
コード例 #2
0
    def _run_and_report_benchmark(self,
                                  use_ds=True,
                                  enable_xla=False,
                                  run_eagerly=False):
        """Runs the benchmark and reports various metrics."""
        keras_utils.set_config_v2(enable_xla)
        start_time_sec = time.time()
        self._train_squad(use_ds=use_ds, run_eagerly=run_eagerly)
        wall_time_sec = time.time() - start_time_sec

        summary = self._read_training_summary_from_file()

        super(BertSquadBenchmarkReal,
              self)._report_benchmark(stats=summary,
                                      wall_time_sec=wall_time_sec,
                                      min_accuracy=0,
                                      max_accuracy=1)
コード例 #3
0
ファイル: run_classifier.py プロジェクト: zhu1990/models
def run_bert(strategy, input_meta_data):
    """Run BERT training."""
    if FLAGS.mode == 'export_only':
        export_classifier(FLAGS.model_export_path, input_meta_data)
        return

    if FLAGS.mode != 'train_and_eval':
        raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode)
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    epochs = FLAGS.num_train_epochs
    train_data_size = input_meta_data['train_data_size']
    steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

    if not strategy:
        raise ValueError('Distribution strategy has not been specified.')
    # Runs customized training loop.
    logging.info(
        'Training using customized training loop TF 2.0 with distrubuted'
        'strategy.')
    use_remote_tpu = (FLAGS.strategy_type == 'tpu' and FLAGS.tpu)
    trained_model = run_customized_training(strategy,
                                            bert_config,
                                            input_meta_data,
                                            FLAGS.model_dir,
                                            epochs,
                                            steps_per_epoch,
                                            FLAGS.steps_per_loop,
                                            eval_steps,
                                            warmup_steps,
                                            FLAGS.learning_rate,
                                            FLAGS.init_checkpoint,
                                            use_remote_tpu=use_remote_tpu,
                                            run_eagerly=FLAGS.run_eagerly)

    if FLAGS.model_export_path:
        with tf.device(
                model_training_utils.get_primary_cpu_task(use_remote_tpu)):
            model_saving_utils.export_bert_model(FLAGS.model_export_path,
                                                 model=trained_model)
    return trained_model
コード例 #4
0
    def _run_bert_classifier(self,
                             callbacks=None,
                             use_ds=True,
                             enable_xla=False):
        """Starts BERT classification task."""
        with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
            input_meta_data = json.loads(reader.read().decode('utf-8'))

        bert_config = modeling.BertConfig.from_json_file(
            FLAGS.bert_config_file)
        epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
        if self.num_steps_per_epoch:
            steps_per_epoch = self.num_steps_per_epoch
        else:
            train_data_size = input_meta_data['train_data_size']
            steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
        warmup_steps = int(epochs * steps_per_epoch * 0.1)
        eval_steps = int(
            math.ceil(input_meta_data['eval_data_size'] /
                      FLAGS.eval_batch_size))
        strategy = distribution_utils.get_distribution_strategy(
            distribution_strategy='mirrored' if use_ds else 'off',
            num_gpus=self.num_gpus)
        # TODO(hongkuny): Enable XLA once we are confident with its performance.
        keras_utils.set_config_v2(enable_xla)

        steps_per_loop = 1

        run_classifier.run_customized_training(strategy,
                                               bert_config,
                                               input_meta_data,
                                               FLAGS.model_dir,
                                               epochs,
                                               steps_per_epoch,
                                               steps_per_loop,
                                               eval_steps,
                                               warmup_steps,
                                               FLAGS.learning_rate,
                                               FLAGS.init_checkpoint,
                                               custom_callbacks=callbacks)
コード例 #5
0
ファイル: run_classifier.py プロジェクト: wangpeng-tju/models
def run_bert(strategy, input_meta_data):
    """Run BERT training."""
    if FLAGS.mode == 'export_only':
        export_classifier(FLAGS.model_export_path, input_meta_data)
        return

    if FLAGS.mode != 'train_and_eval':
        raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode)
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    epochs = FLAGS.num_train_epochs
    train_data_size = input_meta_data['train_data_size']
    steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

    if not strategy:
        raise ValueError('Distribution strategy has not been specified.')

    trained_model = run_bert_classifier(strategy,
                                        bert_config,
                                        input_meta_data,
                                        FLAGS.model_dir,
                                        epochs,
                                        steps_per_epoch,
                                        FLAGS.steps_per_loop,
                                        eval_steps,
                                        warmup_steps,
                                        FLAGS.learning_rate,
                                        FLAGS.init_checkpoint,
                                        run_eagerly=FLAGS.run_eagerly)

    if FLAGS.model_export_path:
        model_saving_utils.export_bert_model(FLAGS.model_export_path,
                                             model=trained_model)
    return trained_model
コード例 #6
0
def run_bert_pretrain(strategy):
    """Runs BERT pre-training."""

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    # Padding for divisibility by 8
    # if bert_config.vocab_size % 8 != 0:
    #   bert_config.vocab_size += 8 - bert_config.vocab_size % 8
    if strategy:
        logging.info(
            'Training using customized training loop TF 2.0 with distrubuted'
            'strategy.')

    keras_utils.set_config_v2(FLAGS.enable_xla)
    # Runs customized training loop.
    return run_customized_training(
        strategy, bert_config, FLAGS.max_seq_length,
        FLAGS.max_predictions_per_seq, FLAGS.model_dir,
        FLAGS.num_steps_per_epoch, FLAGS.steps_per_loop,
        FLAGS.num_train_epochs,
        FLAGS.learning_rate *
        hvd.size() if FLAGS.use_horovod else FLAGS.learning_rate,
        FLAGS.warmup_steps, FLAGS.input_files, FLAGS.train_batch_size)
コード例 #7
0
ファイル: run_squad.py プロジェクト: yashk01/tensorflow1
def train_squad(strategy,
                input_meta_data,
                custom_callbacks=None,
                run_eagerly=False):
  """Run bert squad training."""
  if strategy:
    logging.info('Training using customized training loop with distribution'
                 ' strategy.')
  # Enables XLA in Session Config. Should not be set for TPU.
  keras_utils.set_config_v2(FLAGS.enable_xla)

  use_float16 = common_flags.use_float16()
  if use_float16:
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.keras.mixed_precision.experimental.set_policy(policy)

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
  epochs = FLAGS.num_train_epochs
  num_train_examples = input_meta_data['train_data_size']
  max_seq_length = input_meta_data['max_seq_length']
  steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
  warmup_steps = int(epochs * num_train_examples * 0.1 / FLAGS.train_batch_size)
  train_input_fn = functools.partial(
      input_pipeline.create_squad_dataset,
      FLAGS.train_data_path,
      max_seq_length,
      FLAGS.train_batch_size,
      is_training=True)

  def _get_squad_model():
    """Get Squad model and optimizer."""
    squad_model, core_model = bert_models.squad_model(
        bert_config,
        max_seq_length,
        float_type=tf.float16 if use_float16 else tf.float32)
    squad_model.optimizer = optimization.create_optimizer(
        FLAGS.learning_rate, steps_per_epoch * epochs, warmup_steps)
    if use_float16:
      # Wraps optimizer with a LossScaleOptimizer. This is done automatically
      # in compile() with the "mixed_float16" policy, but since we do not call
      # compile(), we must wrap the optimizer manually.
      squad_model.optimizer = (
          tf.keras.mixed_precision.experimental.LossScaleOptimizer(
              squad_model.optimizer, loss_scale=common_flags.get_loss_scale()))
    return squad_model, core_model

  # The original BERT model does not scale the loss by
  # 1/num_replicas_in_sync. It could be an accident. So, in order to use
  # the same hyper parameter, we do the same thing here by keeping each
  # replica loss as it is.
  loss_fn = get_loss_fn(loss_factor=1.0)
  use_remote_tpu = (FLAGS.strategy_type == 'tpu' and FLAGS.tpu)

  model_training_utils.run_customized_training_loop(
      strategy=strategy,
      model_fn=_get_squad_model,
      loss_fn=loss_fn,
      model_dir=FLAGS.model_dir,
      steps_per_epoch=steps_per_epoch,
      steps_per_loop=FLAGS.steps_per_loop,
      epochs=epochs,
      train_input_fn=train_input_fn,
      init_checkpoint=FLAGS.init_checkpoint,
      use_remote_tpu=use_remote_tpu,
      run_eagerly=run_eagerly,
      custom_callbacks=custom_callbacks)
コード例 #8
0
ファイル: run_squad.py プロジェクト: wuhuaroubj/models
def train_squad(strategy,
                input_meta_data,
                custom_callbacks=None,
                run_eagerly=False):
  """Run bert squad training."""
  if strategy:
    logging.info('Training using customized training loop with distribution'
                 ' strategy.')
  # Enables XLA in Session Config. Should not be set for TPU.
  keras_utils.set_config_v2(FLAGS.enable_xla)

  use_float16 = common_flags.use_float16()
  if use_float16:
    tf.keras.mixed_precision.experimental.set_policy('mixed_float16')

  bert_config = MODEL_CLASSES[FLAGS.model_type][0].from_json_file(
      FLAGS.bert_config_file)
  epochs = FLAGS.num_train_epochs
  num_train_examples = input_meta_data['train_data_size']
  max_seq_length = input_meta_data['max_seq_length']
  steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
  warmup_steps = int(epochs * num_train_examples * 0.1 / FLAGS.train_batch_size)
  train_input_fn = get_dataset_fn(
      FLAGS.train_data_path,
      max_seq_length,
      FLAGS.train_batch_size,
      is_training=True)

  def _get_squad_model():
    """Get Squad model and optimizer."""
    squad_model, core_model = bert_models.squad_model(
        bert_config,
        max_seq_length,
        float_type=tf.float16 if use_float16 else tf.float32,
        hub_module_url=FLAGS.hub_module_url)
    squad_model.optimizer = optimization.create_optimizer(
        FLAGS.learning_rate, steps_per_epoch * epochs, warmup_steps)
    if use_float16:
      # Wraps optimizer with a LossScaleOptimizer. This is done automatically
      # in compile() with the "mixed_float16" policy, but since we do not call
      # compile(), we must wrap the optimizer manually.
      squad_model.optimizer = (
          tf.keras.mixed_precision.experimental.LossScaleOptimizer(
              squad_model.optimizer, loss_scale=common_flags.get_loss_scale()))
    if FLAGS.fp16_implementation == 'graph_rewrite':
      # Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as
      # determined by flags_core.get_tf_dtype(flags_obj) would be 'float32'
      # which will ensure tf.compat.v2.keras.mixed_precision and
      # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double
      # up.
      squad_model.optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
          squad_model.optimizer)
    return squad_model, core_model

  # The original BERT model does not scale the loss by
  # 1/num_replicas_in_sync. It could be an accident. So, in order to use
  # the same hyper parameter, we do the same thing here by keeping each
  # replica loss as it is.
  loss_fn = get_loss_fn(
      loss_factor=1.0 /
      strategy.num_replicas_in_sync if FLAGS.scale_loss else 1.0)

  model_training_utils.run_customized_training_loop(
      strategy=strategy,
      model_fn=_get_squad_model,
      loss_fn=loss_fn,
      model_dir=FLAGS.model_dir,
      steps_per_epoch=steps_per_epoch,
      steps_per_loop=FLAGS.steps_per_loop,
      epochs=epochs,
      train_input_fn=train_input_fn,
      init_checkpoint=FLAGS.init_checkpoint,
      run_eagerly=run_eagerly,
      custom_callbacks=custom_callbacks)
コード例 #9
0
def train_squad(strategy,
                input_meta_data,
                bert_config,
                custom_callbacks=None,
                run_eagerly=False):
    """Run bert squad training."""
    if strategy:
        logging.info(
            'Training using customized training loop with distribution'
            ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)

    use_float16 = common_flags.use_float16()
    if use_float16:
        tf.keras.mixed_precision.experimental.set_policy('mixed_float16')

    epochs = FLAGS.num_train_epochs
    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
    warmup_steps = int(epochs * num_train_examples * 0.1 /
                       FLAGS.train_batch_size)
    train_input_fn = get_dataset_fn(FLAGS.train_data_path,
                                    max_seq_length,
                                    FLAGS.train_batch_size,
                                    is_training=True)

    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            hub_module_url=FLAGS.hub_module_url,
            hub_module_trainable=FLAGS.hub_module_trainable)
        squad_model.optimizer = optimization.create_optimizer(
            FLAGS.learning_rate, steps_per_epoch * epochs, warmup_steps)
        if use_float16:
            # Wraps optimizer with a LossScaleOptimizer. This is done automatically
            # in compile() with the "mixed_float16" policy, but since we do not call
            # compile(), we must wrap the optimizer manually.
            squad_model.optimizer = (
                tf.keras.mixed_precision.experimental.LossScaleOptimizer(
                    squad_model.optimizer,
                    loss_scale=common_flags.get_loss_scale()))
        if FLAGS.fp16_implementation == 'graph_rewrite':
            # Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as
            # determined by flags_core.get_tf_dtype(flags_obj) would be 'float32'
            # which will ensure tf.compat.v2.keras.mixed_precision and
            # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double
            # up.
            squad_model.optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
                squad_model.optimizer)
        return squad_model, core_model

    # The original BERT model does not scale the loss by
    # 1/num_replicas_in_sync. It could be an accident. So, in order to use
    # the same hyper parameter, we do the same thing here by keeping each
    # replica loss as it is.
    loss_fn = get_loss_fn(
        loss_factor=1.0 /
        strategy.num_replicas_in_sync if FLAGS.scale_loss else 1.0)

    # when all_reduce_sum_gradients = False, apply_gradients() no longer
    # implicitly allreduce gradients, users manually allreduce gradient and
    # passed the allreduced grads_and_vars. For now, the clip_by_global_norm
    # will be moved to before users' manual allreduce to keep the math
    # unchanged.
    def clip_by_global_norm_callback(grads_and_vars):
        grads, variables = zip(*grads_and_vars)
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
        return zip(clipped_grads, variables)

    model_training_utils.run_customized_training_loop(
        strategy=strategy,
        model_fn=_get_squad_model,
        loss_fn=loss_fn,
        model_dir=FLAGS.model_dir,
        steps_per_epoch=steps_per_epoch,
        steps_per_loop=FLAGS.steps_per_loop,
        epochs=epochs,
        train_input_fn=train_input_fn,
        init_checkpoint=FLAGS.init_checkpoint,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=True,
        pre_allreduce_callbacks=[clip_by_global_norm_callback])
コード例 #10
0
def train_squad(strategy,
                input_meta_data,
                bert_config,
                custom_callbacks=None,
                run_eagerly=False):
    """Run bert squad training."""
    if strategy:
        logging.info(
            'Training using customized training loop with distribution'
            ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)
    performance.set_mixed_precision_policy(common_flags.dtype())

    epochs = FLAGS.num_train_epochs
    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
    warmup_steps = int(epochs * num_train_examples * 0.1 /
                       FLAGS.train_batch_size)
    train_input_fn = get_dataset_fn(FLAGS.train_data_path,
                                    max_seq_length,
                                    FLAGS.train_batch_size,
                                    is_training=True)

    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            hub_module_url=FLAGS.hub_module_url,
            hub_module_trainable=FLAGS.hub_module_trainable)
        optimizer = optimization.create_optimizer(FLAGS.learning_rate,
                                                  steps_per_epoch * epochs,
                                                  warmup_steps)

        squad_model.optimizer = performance.configure_optimizer(
            optimizer,
            use_float16=common_flags.use_float16(),
            use_graph_rewrite=common_flags.use_graph_rewrite())
        return squad_model, core_model

    # If explicit_allreduce = True, apply_gradients() no longer implicitly
    # allreduce gradients, users manually allreduce gradient and pass the
    # allreduced grads_and_vars to apply_gradients(). clip_by_global_norm will be
    # applied to allreduced gradients.
    def clip_by_global_norm_callback(grads_and_vars):
        grads, variables = zip(*grads_and_vars)
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
        return zip(clipped_grads, variables)

    model_training_utils.run_customized_training_loop(
        strategy=strategy,
        model_fn=_get_squad_model,
        loss_fn=get_loss_fn(),
        model_dir=FLAGS.model_dir,
        steps_per_epoch=steps_per_epoch,
        steps_per_loop=FLAGS.steps_per_loop,
        epochs=epochs,
        train_input_fn=train_input_fn,
        init_checkpoint=FLAGS.init_checkpoint,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=False,
        post_allreduce_callbacks=[clip_by_global_norm_callback])
コード例 #11
0
def run_bert(strategy,
             input_meta_data,
             model_config,
             train_input_fn=None,
             eval_input_fn=None,
             init_checkpoint=None,
             custom_callbacks=None):
  """Run BERT training."""
  if FLAGS.mode == 'export_only':
    # As Keras ModelCheckpoint callback used with Keras compile/fit() API
    # internally uses model.save_weights() to save checkpoints, we must
    # use model.load_weights() when Keras compile/fit() is used.
    export_classifier(FLAGS.model_export_path, input_meta_data,
                      FLAGS.use_keras_compile_fit,
                      model_config, FLAGS.model_dir)
    return

  if FLAGS.mode != 'train_and_eval':
    raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode)
  # Enables XLA in Session Config. Should not be set for TPU.
  keras_utils.set_config_v2(FLAGS.enable_xla)
  performance.set_mixed_precision_policy(common_flags.dtype())

  epochs = FLAGS.num_train_epochs
  train_data_size = input_meta_data['train_data_size']
  steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
  warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
  eval_steps = int(
      math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

  if not strategy:
    raise ValueError('Distribution strategy has not been specified.')

  if not custom_callbacks:
    custom_callbacks = []

  if FLAGS.log_steps:
    custom_callbacks.append(keras_utils.TimeHistory(
        batch_size=FLAGS.train_batch_size,
        log_steps=FLAGS.log_steps,
        logdir=FLAGS.model_dir))

  trained_model = run_bert_classifier(
      strategy,
      model_config,
      input_meta_data,
      FLAGS.model_dir,
      epochs,
      steps_per_epoch,
      FLAGS.steps_per_loop,
      eval_steps,
      warmup_steps,
      FLAGS.learning_rate,
      init_checkpoint or FLAGS.init_checkpoint,
      train_input_fn,
      eval_input_fn,
      run_eagerly=FLAGS.run_eagerly,
      use_keras_compile_fit=FLAGS.use_keras_compile_fit,
      custom_callbacks=custom_callbacks)

  if FLAGS.model_export_path:
    # As Keras ModelCheckpoint callback used with Keras compile/fit() API
    # internally uses model.save_weights() to save checkpoints, we must
    # use model.load_weights() when Keras compile/fit() is used.
    model_saving_utils.export_bert_model(
        FLAGS.model_export_path,
        model=trained_model,
        restore_model_using_load_weights=FLAGS.use_keras_compile_fit)
  return trained_model
コード例 #12
0
def predict_squad(strategy, input_meta_data):
    """Makes predictions for a squad dataset."""
    keras_utils.set_config_v2(FLAGS.enable_xla)
    config_cls, squad_lib, tokenizer_cls = MODEL_CLASSES[FLAGS.model_type]
    bert_config = config_cls.from_json_file(FLAGS.bert_config_file)
    if tokenizer_cls == tokenization.FullTokenizer:
        tokenizer = tokenizer_cls(vocab_file=FLAGS.vocab_file,
                                  do_lower_case=FLAGS.do_lower_case)
    else:
        assert tokenizer_cls == tokenization.FullSentencePieceTokenizer
        tokenizer = tokenizer_cls(sp_model_file=FLAGS.sp_model_file)
    doc_stride = input_meta_data['doc_stride']
    max_query_length = input_meta_data['max_query_length']
    # Whether data should be in Ver 2.0 format.
    version_2_with_negative = input_meta_data.get('version_2_with_negative',
                                                  False)
    eval_examples = squad_lib.read_squad_examples(
        input_file=FLAGS.predict_file,
        is_training=False,
        version_2_with_negative=version_2_with_negative)

    eval_writer = squad_lib.FeatureWriter(filename=os.path.join(
        FLAGS.model_dir, 'eval.tf_record'),
                                          is_training=False)
    eval_features = []

    def _append_feature(feature, is_padding):
        if not is_padding:
            eval_features.append(feature)
        eval_writer.process_feature(feature)

    # TPU requires a fixed batch size for all batches, therefore the number
    # of examples must be a multiple of the batch size, or else examples
    # will get dropped. So we pad with fake examples which are ignored
    # later on.
    kwargs = dict(examples=eval_examples,
                  tokenizer=tokenizer,
                  max_seq_length=input_meta_data['max_seq_length'],
                  doc_stride=doc_stride,
                  max_query_length=max_query_length,
                  is_training=False,
                  output_fn=_append_feature,
                  batch_size=FLAGS.predict_batch_size)

    # squad_lib_sp requires one more argument 'do_lower_case'.
    if squad_lib == squad_lib_sp:
        kwargs['do_lower_case'] = FLAGS.do_lower_case
    dataset_size = squad_lib.convert_examples_to_features(**kwargs)
    eval_writer.close()

    logging.info('***** Running predictions *****')
    logging.info('  Num orig examples = %d', len(eval_examples))
    logging.info('  Num split examples = %d', len(eval_features))
    logging.info('  Batch size = %d', FLAGS.predict_batch_size)

    num_steps = int(dataset_size / FLAGS.predict_batch_size)
    if FLAGS.benchmark and num_steps > 1000:
        num_steps = 1000
    all_results = predict_squad_customized(strategy, input_meta_data,
                                           bert_config, eval_writer.filename,
                                           num_steps)

    if FLAGS.benchmark:
        return

    output_prediction_file = os.path.join(FLAGS.model_dir, 'predictions.json')
    output_nbest_file = os.path.join(FLAGS.model_dir, 'nbest_predictions.json')
    output_null_log_odds_file = os.path.join(FLAGS.model_dir, 'null_odds.json')

    squad_lib.write_predictions(eval_examples,
                                eval_features,
                                all_results,
                                FLAGS.n_best_size,
                                FLAGS.max_answer_length,
                                FLAGS.do_lower_case,
                                output_prediction_file,
                                output_nbest_file,
                                output_null_log_odds_file,
                                verbose=FLAGS.verbose_logging)

    if FLAGS.eval_script:
        eval_out = subprocess.check_output([
            sys.executable, FLAGS.eval_script, FLAGS.predict_file,
            output_prediction_file
        ])
        scores = str(eval_out).strip()
        exact_match = float(scores.split(":")[1].split(",")[0])
        if version_2_with_negative:
            f1 = float(scores.split(":")[2].split(",")[0])
        else:
            f1 = float(scores.split(":")[2].split("}")[0])
        dllogging = input_meta_data['dllogging']
        dllogging.logger.log(step=(),
                             data={"f1": f1},
                             verbosity=Verbosity.DEFAULT)
        dllogging.logger.log(step=(),
                             data={"exact_match": exact_match},
                             verbosity=Verbosity.DEFAULT)
        print(str(eval_out))
コード例 #13
0
def train_squad(strategy,
                input_meta_data,
                custom_callbacks=None,
                run_eagerly=False):
    """Run bert squad training."""
    if strategy:
        logging.info(
            'Training using customized training loop with distribution'
            ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)

    use_float16 = common_flags.use_float16()
    if use_float16:
        tf.keras.mixed_precision.experimental.set_policy('mixed_float16')

    bert_config = MODEL_CLASSES[FLAGS.model_type][0].from_json_file(
        FLAGS.bert_config_file)
    epochs = FLAGS.num_train_epochs
    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps
    if FLAGS.use_horovod:
        global_batch_size *= hvd.size()
    steps_per_epoch = int(num_train_examples / global_batch_size)
    warmup_steps = int(epochs * num_train_examples * 0.1 / global_batch_size)
    train_input_fn = get_dataset_fn(FLAGS.train_data_path,
                                    max_seq_length,
                                    FLAGS.train_batch_size,
                                    is_training=True,
                                    use_horovod=FLAGS.use_horovod)

    if FLAGS.benchmark:
        steps_per_epoch = 800
        epochs = 1

    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            float_type=tf.float16 if FLAGS.use_fp16 else tf.float32,
            hub_module_url=FLAGS.hub_module_url)
        learning_rate = FLAGS.learning_rate * hvd.size(
        ) if FLAGS.use_horovod else FLAGS.learning_rate
        squad_model.optimizer = optimization.create_optimizer(
            learning_rate, steps_per_epoch * epochs, warmup_steps,
            FLAGS.optimizer_type)
        if FLAGS.use_fp16:
            squad_model.optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
                squad_model.optimizer, dynamic=True)
        return squad_model, core_model

    # The original BERT model does not scale the loss by
    # 1/num_replicas_in_sync. It could be an accident. So, in order to use
    # the same hyper parameter, we do the same thing here by keeping each
    # replica loss as it is.
    loss_fn = get_loss_fn(loss_factor=1.0 / strategy.num_replicas_in_sync
                          if FLAGS.scale_loss and strategy else 1.0)

    params = {'dllogging': input_meta_data['dllogging'], 'FLAGS': FLAGS}

    model_training_utils.run_customized_training_loop(
        strategy=strategy,
        model_fn=_get_squad_model,
        loss_fn=loss_fn,
        model_dir=FLAGS.model_dir,
        steps_per_epoch=steps_per_epoch,
        num_accumulative_step=FLAGS.num_accumulation_steps,
        steps_per_loop=FLAGS.steps_per_loop,
        epochs=epochs,
        train_input_fn=train_input_fn,
        init_checkpoint=FLAGS.init_checkpoint,
        hvd=hvd if FLAGS.use_horovod else None,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks,
        params=params)