def create_lm_model(self,
                        vocab_size,
                        sequence_length,
                        hidden_size,
                        num_predictions,
                        output="predictions"):
        # First, create a transformer stack that we can use to get the LM's
        # vocabulary weight.
        xformer_stack = networks.BertEncoder(
            vocab_size=vocab_size,
            num_layers=1,
            sequence_length=sequence_length,
            hidden_size=hidden_size,
            num_attention_heads=4,
        )
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        _ = xformer_stack([word_ids, mask, type_ids])

        # Create a maskedLM from the transformer stack.
        test_layer = layers.MaskedLM(
            embedding_table=xformer_stack.get_embedding_table(), output=output)

        # Create a model from the masked LM layer.
        lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size))
        masked_lm_positions = tf.keras.Input(shape=(num_predictions, ),
                                             dtype=tf.int32)
        output = test_layer(lm_input_tensor,
                            masked_positions=masked_lm_positions)
        return tf.keras.Model([lm_input_tensor, masked_lm_positions], output)
Ejemplo n.º 2
0
    def __init__(
            self,
            encoder_network: tf.keras.Model,
            mlm_activation=None,
            mlm_initializer='glorot_uniform',
            classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
            customized_masked_lm: Optional[tf.keras.layers.Layer] = None,
            name: str = 'bert',
            **kwargs):
        super().__init__(self, name=name, **kwargs)
        self._config = {
            'encoder_network': encoder_network,
            'mlm_initializer': mlm_initializer,
            'classification_heads': classification_heads,
            'name': name,
        }
        self.encoder_network = encoder_network
        inputs = copy.copy(self.encoder_network.inputs)
        self.classification_heads = classification_heads or []
        if len(set([cls.name for cls in self.classification_heads])) != len(
                self.classification_heads):
            raise ValueError('Classification heads should have unique names.')

        self.masked_lm = customized_masked_lm or layers.MaskedLM(
            embedding_table=self.encoder_network.get_embedding_table(),
            activation=mlm_activation,
            initializer=mlm_initializer,
            name='cls/predictions')
        masked_lm_positions = tf.keras.layers.Input(shape=(None, ),
                                                    name='masked_lm_positions',
                                                    dtype=tf.int32)
        inputs.append(masked_lm_positions)
        self.inputs = inputs
Ejemplo n.º 3
0
    def __init__(
            self,
            encoder_network: tf.keras.Model,
            mlm_activation=None,
            mlm_initializer='glorot_uniform',
            classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
            name: str = 'bert',
            **kwargs):
        self._self_setattr_tracking = False
        self._config = {
            'encoder_network': encoder_network,
            'mlm_initializer': mlm_initializer,
            'classification_heads': classification_heads,
            'name': name,
        }
        self.encoder_network = encoder_network
        inputs = copy.copy(self.encoder_network.inputs)
        outputs = dict()
        encoder_network_outputs = self.encoder_network(inputs)
        if isinstance(encoder_network_outputs, list):
            outputs['pooled_output'] = encoder_network_outputs[1]
            # When `encoder_network` was instantiated with return_all_encoder_outputs
            # set to True, `encoder_network_outputs[0]` is a list containing
            # all transformer layers' output.
            if isinstance(encoder_network_outputs[0], list):
                outputs['encoder_outputs'] = encoder_network_outputs[0]
                outputs['sequence_output'] = encoder_network_outputs[0][-1]
            else:
                outputs['sequence_output'] = encoder_network_outputs[0]
        elif isinstance(encoder_network_outputs, dict):
            outputs = encoder_network_outputs
        else:
            raise ValueError(
                'encoder_network\'s output should be either a list '
                'or a dict, but got %s' % encoder_network_outputs)

        sequence_output = outputs['sequence_output']
        self.classification_heads = classification_heads or []
        if len(set([cls.name for cls in self.classification_heads])) != len(
                self.classification_heads):
            raise ValueError('Classification heads should have unique names.')

        self.masked_lm = layers.MaskedLM(
            embedding_table=self.encoder_network.get_embedding_table(),
            activation=mlm_activation,
            initializer=mlm_initializer,
            name='cls/predictions')
        masked_lm_positions = tf.keras.layers.Input(shape=(None, ),
                                                    name='masked_lm_positions',
                                                    dtype=tf.int32)
        inputs.append(masked_lm_positions)
        outputs['mlm_logits'] = self.masked_lm(
            sequence_output, masked_positions=masked_lm_positions)
        for cls_head in self.classification_heads:
            outputs[cls_head.name] = cls_head(sequence_output)

        super(BertPretrainerV2, self).__init__(inputs=inputs,
                                               outputs=outputs,
                                               name=name,
                                               **kwargs)
Ejemplo n.º 4
0
  def test_bert_pretrainerv2(self, dict_outputs, return_all_encoder_outputs,
                             use_customized_masked_lm):
    """Validate that the Keras object can be created."""
    # Build a transformer network to use within the BERT trainer.
    vocab_size = 100
    sequence_length = 512
    hidden_size = 48
    num_layers = 2
    test_network = networks.BertEncoder(
        vocab_size=vocab_size,
        num_layers=num_layers,
        hidden_size=hidden_size,
        max_sequence_length=sequence_length,
        return_all_encoder_outputs=return_all_encoder_outputs,
        dict_outputs=dict_outputs)

    # Create a BERT trainer with the created network.
    if use_customized_masked_lm:
      customized_masked_lm = layers.MaskedLM(
          embedding_table=test_network.get_embedding_table())
    else:
      customized_masked_lm = None

    bert_trainer_model = bert_pretrainer.BertPretrainerV2(
        encoder_network=test_network, customized_masked_lm=customized_masked_lm)
    num_token_predictions = 20
    # Create a set of 2-dimensional inputs (the first dimension is implicit).
    inputs = dict(
        input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
        input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
        input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32),
        masked_lm_positions=tf.keras.Input(
            shape=(num_token_predictions,), dtype=tf.int32))

    # Invoke the trainer model on the inputs. This causes the layer to be built.
    outputs = bert_trainer_model(inputs)

    has_encoder_outputs = dict_outputs or return_all_encoder_outputs
    if has_encoder_outputs:
      self.assertSameElements(
          outputs.keys(),
          ['sequence_output', 'pooled_output', 'mlm_logits', 'encoder_outputs'])
      self.assertLen(outputs['encoder_outputs'], num_layers)
    else:
      self.assertSameElements(
          outputs.keys(), ['sequence_output', 'pooled_output', 'mlm_logits'])

    # Validate that the outputs are of the expected shape.
    expected_lm_shape = [None, num_token_predictions, vocab_size]
    self.assertAllEqual(expected_lm_shape,
                        outputs['mlm_logits'].shape.as_list())

    expected_sequence_output_shape = [None, sequence_length, hidden_size]
    self.assertAllEqual(expected_sequence_output_shape,
                        outputs['sequence_output'].shape.as_list())

    expected_pooled_output_shape = [None, hidden_size]
    self.assertAllEqual(expected_pooled_output_shape,
                        outputs['pooled_output'].shape.as_list())
Ejemplo n.º 5
0
    def __init__(self,
                 generator_network,
                 discriminator_network,
                 vocab_size,
                 num_classes,
                 sequence_length,
                 num_token_predictions,
                 mlm_activation=None,
                 mlm_initializer='glorot_uniform',
                 output_type='logits',
                 disallow_correct=False,
                 **kwargs):
        super(ElectraPretrainer, self).__init__()
        self._config = {
            'generator_network': generator_network,
            'discriminator_network': discriminator_network,
            'vocab_size': vocab_size,
            'num_classes': num_classes,
            'sequence_length': sequence_length,
            'num_token_predictions': num_token_predictions,
            'mlm_activation': mlm_activation,
            'mlm_initializer': mlm_initializer,
            'output_type': output_type,
            'disallow_correct': disallow_correct,
        }
        for k, v in kwargs.items():
            self._config[k] = v

        self.generator_network = generator_network
        self.discriminator_network = discriminator_network
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.sequence_length = sequence_length
        self.num_token_predictions = num_token_predictions
        self.mlm_activation = mlm_activation
        self.mlm_initializer = mlm_initializer
        self.output_type = output_type
        self.disallow_correct = disallow_correct
        self.masked_lm = layers.MaskedLM(
            embedding_table=generator_network.get_embedding_table(),
            activation=mlm_activation,
            initializer=mlm_initializer,
            output=output_type,
            name='generator_masked_lm')
        self.classification = layers.ClassificationHead(
            inner_dim=generator_network._config_dict['hidden_size'],
            num_classes=num_classes,
            initializer=mlm_initializer,
            name='generator_classification_head')
        self.discriminator_projection = tf.keras.layers.Dense(
            units=discriminator_network._config_dict['hidden_size'],
            activation=mlm_activation,
            kernel_initializer=mlm_initializer,
            name='discriminator_projection_head')
        self.discriminator_head = tf.keras.layers.Dense(
            units=1, kernel_initializer=mlm_initializer)
Ejemplo n.º 6
0
    def __init__(self,
                 generator_network,
                 discriminator_mws_network,
                 num_discriminator_task_agnostic_layers,
                 vocab_size,
                 candidate_size=5,
                 mlm_activation=None,
                 mlm_initializer='glorot_uniform',
                 output_type='logits',
                 **kwargs):
        super().__init__()
        self._config = {
            'generator_network': generator_network,
            'discriminator_mws_network': discriminator_mws_network,
            'num_discriminator_task_agnostic_layers':
            num_discriminator_task_agnostic_layers,
            'vocab_size': vocab_size,
            'candidate_size': candidate_size,
            'mlm_activation': mlm_activation,
            'mlm_initializer': mlm_initializer,
            'output_type': output_type,
        }
        for k, v in kwargs.items():
            self._config[k] = v

        self.generator_network = generator_network
        self.discriminator_mws_network = discriminator_mws_network
        self.vocab_size = vocab_size
        self.candidate_size = candidate_size
        self.mlm_activation = mlm_activation
        self.mlm_initializer = mlm_initializer
        self.output_type = output_type
        embedding_table = generator_network.embedding_network.get_embedding_table(
        )
        self.masked_lm = layers.MaskedLM(embedding_table=embedding_table,
                                         activation=mlm_activation,
                                         initializer=mlm_initializer,
                                         output=output_type,
                                         name='generator_masked_lm')
        discriminator_cfg = self.discriminator_mws_network.get_config()
        self.discriminator_rtd_head = ReplacedTokenDetectionHead(
            encoder_cfg=discriminator_cfg,
            num_task_agnostic_layers=num_discriminator_task_agnostic_layers,
            output=output_type,
            name='discriminator_rtd')
        hidden_cfg = discriminator_cfg['hidden_cfg']
        self.discriminator_mws_head = MultiWordSelectionHead(
            embedding_table=embedding_table,
            activation=hidden_cfg['intermediate_activation'],
            initializer=hidden_cfg['kernel_initializer'],
            output=output_type,
            name='discriminator_mws')
        self.num_task_agnostic_layers = num_discriminator_task_agnostic_layers
Ejemplo n.º 7
0
    def __init__(
            self,
            num_masked_tokens: int,
            encoder_network: tf.keras.Model,
            mlm_activation=None,
            mlm_initializer='glorot_uniform',
            classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
            name: str = 'bert',
            **kwargs):
        self._self_setattr_tracking = False
        self._config = {
            'encoder_network': encoder_network,
            'num_masked_tokens': num_masked_tokens,
            'mlm_initializer': mlm_initializer,
            'classification_heads': classification_heads,
            'name': name,
        }

        self.encoder_network = encoder_network
        inputs = copy.copy(self.encoder_network.inputs)
        sequence_output, _ = self.encoder_network(inputs)

        self.classification_heads = classification_heads or []
        if len(set([cls.name for cls in self.classification_heads])) != len(
                self.classification_heads):
            raise ValueError('Classification heads should have unique names.')

        outputs = dict()
        if num_masked_tokens > 0:
            self.masked_lm = layers.MaskedLM(
                embedding_table=self.encoder_network.get_embedding_table(),
                activation=mlm_activation,
                initializer=mlm_initializer,
                name='cls/predictions')
            masked_lm_positions = tf.keras.layers.Input(
                shape=(num_masked_tokens, ),
                name='masked_lm_positions',
                dtype=tf.int32)
            inputs.append(masked_lm_positions)
            outputs['lm_output'] = self.masked_lm(
                sequence_output, masked_positions=masked_lm_positions)
        for cls_head in self.classification_heads:
            outputs[cls_head.name] = cls_head(sequence_output)

        super(BertPretrainerV2, self).__init__(inputs=inputs,
                                               outputs=outputs,
                                               name=name,
                                               **kwargs)
Ejemplo n.º 8
0
    def __init__(self,
                 network,
                 num_classes,
                 num_token_predictions,
                 embedding_table=None,
                 activation=None,
                 initializer='glorot_uniform',
                 output='logits',
                 **kwargs):

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a copy of the network inputs for use
        # when we construct the Model object at the end of init. (We keep a copy
        # because we'll be adding another tensor to the copy later.)
        network_inputs = network.inputs
        inputs = copy.copy(network_inputs)

        # Because we have a copy of inputs to create this Model object, we can
        # invoke the Network object with its own input tensors to start the Model.
        # Note that, because of how deferred construction happens, we can't use
        # the copy of the list here - by the time the network is invoked, the list
        # object contains the additional input added below.
        sequence_output, cls_output = network(network_inputs)

        # The encoder network may get outputs from all layers.
        if isinstance(sequence_output, list):
            sequence_output = sequence_output[-1]
        if isinstance(cls_output, list):
            cls_output = cls_output[-1]
        sequence_output_length = sequence_output.shape.as_list()[1]
        if sequence_output_length is not None and (sequence_output_length <
                                                   num_token_predictions):
            raise ValueError(
                "The passed network's output length is %s, which is less than the "
                'requested num_token_predictions %s.' %
                (sequence_output_length, num_token_predictions))

        masked_lm_positions = tf.keras.layers.Input(
            shape=(num_token_predictions, ),
            name='masked_lm_positions',
            dtype=tf.int32)
        inputs.append(masked_lm_positions)

        if embedding_table is None:
            embedding_table = network.get_embedding_table()
        masked_lm = layers.MaskedLM(embedding_table=embedding_table,
                                    activation=activation,
                                    initializer=initializer,
                                    output=output,
                                    name='cls/predictions')
        lm_outputs = masked_lm(sequence_output,
                               masked_positions=masked_lm_positions)

        classification = networks.Classification(
            input_width=cls_output.shape[-1],
            num_classes=num_classes,
            initializer=initializer,
            output=output,
            name='classification')
        sentence_outputs = classification(cls_output)

        super(BertPretrainer,
              self).__init__(inputs=inputs,
                             outputs=dict(masked_lm=lm_outputs,
                                          classification=sentence_outputs),
                             **kwargs)

        # b/164516224
        # Once we've created the network using the Functional API, we call
        # super().__init__ as though we were invoking the Functional API Model
        # constructor, resulting in this object having all the properties of a model
        # created using the Functional API. Once super().__init__ is called, we
        # can assign attributes to `self` - note that all `self` assignments are
        # below this line.
        config_dict = {
            'network': network,
            'num_classes': num_classes,
            'num_token_predictions': num_token_predictions,
            'activation': activation,
            'initializer': initializer,
            'output': output,
        }

        # We are storing the config dict as a namedtuple here to ensure checkpoint
        # compatibility with an earlier version of this model which did not track
        # the config dict attribute. TF does not track immutable attrs which
        # do not contain Trackables, so by creating a config namedtuple instead of
        # a dict we avoid tracking it.
        config_cls = collections.namedtuple('Config', config_dict.keys())
        self._config = config_cls(**config_dict)

        self.encoder = network
        self.classification = classification
        self.masked_lm = masked_lm
Ejemplo n.º 9
0
    def __init__(self,
                 network,
                 num_classes,
                 num_token_predictions,
                 embedding_table=None,
                 activation=None,
                 initializer='glorot_uniform',
                 output='logits',
                 **kwargs):
        self._self_setattr_tracking = False
        self._config = {
            'network': network,
            'num_classes': num_classes,
            'num_token_predictions': num_token_predictions,
            'activation': activation,
            'initializer': initializer,
            'output': output,
        }
        self.encoder = network
        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a copy of the network inputs for use
        # when we construct the Model object at the end of init. (We keep a copy
        # because we'll be adding another tensor to the copy later.)
        network_inputs = self.encoder.inputs
        inputs = copy.copy(network_inputs)

        # Because we have a copy of inputs to create this Model object, we can
        # invoke the Network object with its own input tensors to start the Model.
        # Note that, because of how deferred construction happens, we can't use
        # the copy of the list here - by the time the network is invoked, the list
        # object contains the additional input added below.
        sequence_output, cls_output = self.encoder(network_inputs)

        # The encoder network may get outputs from all layers.
        if isinstance(sequence_output, list):
            sequence_output = sequence_output[-1]
        if isinstance(cls_output, list):
            cls_output = cls_output[-1]
        sequence_output_length = sequence_output.shape.as_list()[1]
        if sequence_output_length is not None and (sequence_output_length <
                                                   num_token_predictions):
            raise ValueError(
                "The passed network's output length is %s, which is less than the "
                'requested num_token_predictions %s.' %
                (sequence_output_length, num_token_predictions))

        masked_lm_positions = tf.keras.layers.Input(
            shape=(num_token_predictions, ),
            name='masked_lm_positions',
            dtype=tf.int32)
        inputs.append(masked_lm_positions)

        if embedding_table is None:
            embedding_table = self.encoder.get_embedding_table()
        self.masked_lm = layers.MaskedLM(embedding_table=embedding_table,
                                         activation=activation,
                                         initializer=initializer,
                                         output=output,
                                         name='cls/predictions')
        lm_outputs = self.masked_lm(sequence_output,
                                    masked_positions=masked_lm_positions)

        self.classification = networks.Classification(
            input_width=cls_output.shape[-1],
            num_classes=num_classes,
            initializer=initializer,
            output=output,
            name='classification')
        sentence_outputs = self.classification(cls_output)

        super(BertPretrainer,
              self).__init__(inputs=inputs,
                             outputs=dict(masked_lm=lm_outputs,
                                          classification=sentence_outputs),
                             **kwargs)
Ejemplo n.º 10
0
  def __init__(
      self,
      encoder_network: tf.keras.Model,
      mlm_activation=None,
      mlm_initializer='glorot_uniform',
      classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
      customized_masked_lm: Optional[tf.keras.layers.Layer] = None,
      name: str = 'bert',
      **kwargs):

    inputs = copy.copy(encoder_network.inputs)
    outputs = {}
    encoder_network_outputs = encoder_network(inputs)
    if isinstance(encoder_network_outputs, list):
      outputs['pooled_output'] = encoder_network_outputs[1]
      if isinstance(encoder_network_outputs[0], list):
        outputs['encoder_outputs'] = encoder_network_outputs[0]
        outputs['sequence_output'] = encoder_network_outputs[0][-1]
      else:
        outputs['sequence_output'] = encoder_network_outputs[0]
    elif isinstance(encoder_network_outputs, dict):
      outputs = encoder_network_outputs
    else:
      raise ValueError('encoder_network\'s output should be either a list '
                       'or a dict, but got %s' % encoder_network_outputs)

    masked_lm_positions = tf.keras.layers.Input(
        shape=(None,), name='masked_lm_positions', dtype=tf.int32)
    inputs.append(masked_lm_positions)
    masked_lm_layer = customized_masked_lm or layers.MaskedLM(
        embedding_table=encoder_network.get_embedding_table(),
        activation=mlm_activation,
        initializer=mlm_initializer,
        name='cls/predictions')
    sequence_output = outputs['sequence_output']
    outputs['mlm_logits'] = masked_lm_layer(
        sequence_output, masked_positions=masked_lm_positions)

    classification_head_layers = classification_heads or []
    for cls_head in classification_head_layers:
      cls_outputs = cls_head(sequence_output)
      if isinstance(cls_outputs, dict):
        outputs.update(cls_outputs)
      else:
        outputs[cls_head.name] = cls_outputs

    super(MobileBERTEdgeTPUPretrainer, self).__init__(
        inputs=inputs,
        outputs=outputs,
        name=name,
        **kwargs)

    self._config = {
        'encoder_network': encoder_network,
        'mlm_activation': mlm_activation,
        'mlm_initializer': mlm_initializer,
        'classification_heads': classification_heads,
        'customized_masked_lm': customized_masked_lm,
        'name': name,
    }

    self.encoder_network = encoder_network
    self.masked_lm = masked_lm_layer
    self.classification_heads = classification_head_layers
Ejemplo n.º 11
0
    self.generator_network = generator_network
    self.discriminator_network = discriminator_network
    self.vocab_size = vocab_size
    self.num_classes = num_classes
<<<<<<< HEAD
    self.sequence_length = sequence_length
=======
>>>>>>> a811a3b7e640722318ad868c99feddf3f3063e36
    self.num_token_predictions = num_token_predictions
    self.mlm_activation = mlm_activation
    self.mlm_initializer = mlm_initializer
    self.output_type = output_type
    self.disallow_correct = disallow_correct
    self.masked_lm = layers.MaskedLM(
        embedding_table=generator_network.get_embedding_table(),
        activation=mlm_activation,
        initializer=mlm_initializer,
        output=output_type,
        name='generator_masked_lm')
    self.classification = layers.ClassificationHead(
        inner_dim=generator_network._config_dict['hidden_size'],
        num_classes=num_classes,
        initializer=mlm_initializer,
        name='generator_classification_head')
    self.discriminator_projection = tf.keras.layers.Dense(
        units=discriminator_network._config_dict['hidden_size'],
        activation=mlm_activation,
        kernel_initializer=mlm_initializer,
        name='discriminator_projection_head')
    self.discriminator_head = tf.keras.layers.Dense(
        units=1, kernel_initializer=mlm_initializer)
Ejemplo n.º 12
0
      raise ValueError(
          "The passed network's output length is %s, which is less than the "
          'requested num_token_predictions %s.' %
          (sequence_output_length, num_token_predictions))

    masked_lm_positions = tf.keras.layers.Input(
        shape=(num_token_predictions,),
        name='masked_lm_positions',
        dtype=tf.int32)
    inputs.append(masked_lm_positions)

    if embedding_table is None:
      embedding_table = self.encoder.get_embedding_table()
    self.masked_lm = layers.MaskedLM(
        embedding_table=embedding_table,
        activation=activation,
        initializer=initializer,
        output=output,
        name='cls/predictions')
    lm_outputs = self.masked_lm(
        sequence_output, masked_positions=masked_lm_positions)

    self.classification = networks.Classification(
        input_width=cls_output.shape[-1],
        num_classes=num_classes,
        initializer=initializer,
        output=output,
        name='classification')
    sentence_outputs = self.classification(cls_output)

    super(BertPretrainer, self).__init__(
        inputs=inputs,