Beispiel #1
0
def squad_model(bert_config,
                max_seq_length,
                initializer=None,
                hub_module_url=None,
                hub_module_trainable=True):
    """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    initializer: Initializer for the final dense layer in the span labeler.
      Defaulted to TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.
    hub_module_trainable: True to finetune layers in the hub module.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.
  """
    if initializer is None:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)
    if not hub_module_url:
        bert_encoder = get_transformer_encoder(bert_config, max_seq_length)
        return models.BertSpanLabeler(network=bert_encoder,
                                      initializer=initializer), bert_encoder

    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_type_ids')
    core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable)
    pooled_output, sequence_output = core_model(
        [input_word_ids, input_mask, input_type_ids])
    bert_encoder = tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids,
    },
                                  outputs=[sequence_output, pooled_output],
                                  name='core_model')
    return models.BertSpanLabeler(network=bert_encoder,
                                  initializer=initializer), bert_encoder
    def build_model(self):
        if self._hub_module:
            # TODO(lehou): maybe add the hub_module building logic to a util function.
            input_word_ids = tf.keras.layers.Input(shape=(None, ),
                                                   dtype=tf.int32,
                                                   name='input_word_ids')
            input_mask = tf.keras.layers.Input(shape=(None, ),
                                               dtype=tf.int32,
                                               name='input_mask')
            input_type_ids = tf.keras.layers.Input(shape=(None, ),
                                                   dtype=tf.int32,
                                                   name='input_type_ids')
            bert_model = hub.KerasLayer(self._hub_module, trainable=True)
            pooled_output, sequence_output = bert_model(
                [input_word_ids, input_mask, input_type_ids])
            encoder_network = tf.keras.Model(
                inputs=[input_word_ids, input_mask, input_type_ids],
                outputs=[sequence_output, pooled_output])
        else:
            encoder_network = encoders.instantiate_encoder_from_cfg(
                self.task_config.network)

        return models.BertSpanLabeler(
            network=encoder_network,
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=self.task_config.network.initializer_range))
Beispiel #3
0
def create_qa_model(bert_config,
                    max_seq_length,
                    initializer=None,
                    hub_module_url=None,
                    hub_module_trainable=True,
                    is_tf2=True):
  """Returns BERT qa model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    initializer: Initializer for the final dense layer in the span labeler.
      Defaulted to TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.
    hub_module_trainable: True to finetune layers in the hub module.
    is_tf2: boolean, whether the hub module is in TensorFlow 2.x format.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.
  """

  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')

  if is_tf2:
    core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable)
    pooled_output, sequence_output = core_model(
        [input_word_ids, input_mask, input_type_ids])
  else:
    bert_model = hub_loader.HubKerasLayerV1V2(
        hub_module_url,
        signature='tokens',
        signature_outputs_as_dict=True,
        trainable=hub_module_trainable)
    outputs = bert_model({
        'input_ids': input_word_ids,
        'input_mask': input_mask,
        'segment_ids': input_type_ids
    })

    pooled_output = outputs['pooled_output']
    sequence_output = outputs['sequence_output']

  bert_encoder = tf.keras.Model(
      inputs=[input_word_ids, input_mask, input_type_ids],
      outputs=[sequence_output, pooled_output],
      name='core_model')
  return models.BertSpanLabeler(
      network=bert_encoder, initializer=initializer), bert_encoder
 def setUp(self):
     super(ExportTfliteSquadTest, self).setUp()
     experiment_params = params.EdgeTPUBERTCustomParams()
     pretrainer_model = model_builder.build_bert_pretrainer(
         experiment_params.student_model, name='pretrainer')
     encoder_network = pretrainer_model.encoder_network
     self.span_labeler = models.BertSpanLabeler(
         network=encoder_network,
         initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))
Beispiel #5
0
  def build_model(self):
    if self._hub_module:
      encoder_network = utils.get_encoder_from_hub(self._hub_module)
    else:
      encoder_network = encoders.instantiate_encoder_from_cfg(
          self.task_config.network)

    return models.BertSpanLabeler(
        network=encoder_network,
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=self.task_config.network.initializer_range))
Beispiel #6
0
 def build_model(self):
     if self._hub_module:
         encoder_network = utils.get_encoder_from_hub(self._hub_module)
     else:
         encoder_network = encoders.instantiate_encoder_from_cfg(
             self.task_config.model.encoder)
     # Currently, we only supports bert-style question answering finetuning.
     return models.BertSpanLabeler(
         network=encoder_network,
         initializer=tf.keras.initializers.TruncatedNormal(
             stddev=self.task_config.model.encoder.initializer_range))
Beispiel #7
0
 def build_model(self):
   if self.task_config.hub_module_url and self.task_config.init_checkpoint:
     raise ValueError('At most one of `hub_module_url` and '
                      '`init_checkpoint` can be specified.')
   if self.task_config.hub_module_url:
     encoder_network = utils.get_encoder_from_hub(
         self.task_config.hub_module_url)
   else:
     encoder_network = encoders.build_encoder(self.task_config.model.encoder)
   encoder_cfg = self.task_config.model.encoder.get()
   return models.BertSpanLabeler(
       network=encoder_network,
       initializer=tf.keras.initializers.TruncatedNormal(
           stddev=encoder_cfg.initializer_range))
Beispiel #8
0
 def build_model(self):
     if self.task_config.hub_module_url and self.task_config.init_checkpoint:
         raise ValueError('At most one of `hub_module_url` and '
                          '`init_checkpoint` can be specified.')
     if self.task_config.hub_module_url:
         hub_module = hub.load(self.task_config.hub_module_url)
     else:
         hub_module = None
     if hub_module:
         encoder_network = utils.get_encoder_from_hub(hub_module)
     else:
         encoder_network = encoders.build_encoder(
             self.task_config.model.encoder)
     encoder_cfg = self.task_config.model.encoder.get()
     # Currently, we only supports bert-style question answering finetuning.
     return models.BertSpanLabeler(
         network=encoder_network,
         initializer=tf.keras.initializers.TruncatedNormal(
             stddev=encoder_cfg.initializer_range))
Beispiel #9
0
def main(argv: Sequence[str]) -> None:
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Set up experiment params and load the configs from file/files.
    experiment_params = params.EdgeTPUBERTCustomParams()
    experiment_params = utils.config_override(experiment_params, FLAGS)

    # change the input mask type to tf.float32 to avoid additional casting op.
    experiment_params.student_model.encoder.mobilebert.input_mask_dtype = 'float32'

    # Experiments indicate using -120 as the mask value for Softmax is good enough
    # for both int8 and bfloat. So we set quantization_friendly to True for both
    # quant and float model.
    pretrainer_model = model_builder.build_bert_pretrainer(
        experiment_params.student_model,
        name='pretrainer',
        quantization_friendly=True)

    encoder_network = pretrainer_model.encoder_network
    model = models.BertSpanLabeler(
        network=encoder_network,
        initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01))

    # Load model weights.
    if FLAGS.model_checkpoint is not None:
        checkpoint_dict = {'model': model}
        checkpoint = tf.train.Checkpoint(**checkpoint_dict)
        checkpoint.restore(
            FLAGS.model_checkpoint).assert_existing_objects_matched()

    model_for_serving = build_model_for_serving(model)
    model_for_serving.summary()

    # TODO(b/194449109): Need to save the model to file and then convert tflite
    # with 'tf.lite.TFLiteConverter.from_saved_model()' to get the expected
    # accuracy
    tmp_dir = tempfile.TemporaryDirectory().name
    model_for_serving.save(tmp_dir)

    def _representative_dataset():
        dataset_params = question_answering_dataloader.QADataConfig()
        dataset_params.input_path = SQUAD_TRAIN_SPLIT
        dataset_params.drop_remainder = False
        dataset_params.global_batch_size = 1
        dataset_params.is_training = True

        dataset = orbit.utils.make_distributed_dataset(
            tf.distribute.get_strategy(), build_inputs, dataset_params)
        for example in dataset.take(100):
            inputs = example[0]
            input_word_ids = inputs['input_word_ids']
            input_mask = inputs['input_mask']
            input_type_ids = inputs['input_type_ids']
            yield [input_word_ids, input_mask, input_type_ids]

    converter = tf.lite.TFLiteConverter.from_saved_model(tmp_dir)
    if FLAGS.quantization_method in ['full-integer', 'hybrid']:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
    if FLAGS.quantization_method in ['full-integer']:
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.float32
        converter.representative_dataset = _representative_dataset

    tflite_quant_model = converter.convert()
    export_model_path = os.path.join(FLAGS.export_path, 'model.tflite')
    with tf.io.gfile.GFile(export_model_path, 'wb') as f:
        f.write(tflite_quant_model)
    logging.info('Successfully save the tflite to %s', FLAGS.export_path)