def create_classification_model(self, input_width, num_classes):
     test_object = networks.Classification(input_width=input_width,
                                           num_classes=num_classes)
     # Create a 2-dimensional input (the first dimension is implicit).
     pooled_data = tf.keras.Input(shape=(input_width, ), dtype=tf.float32)
     output = test_object(pooled_data)
     return tf.keras.Model(pooled_data, output)
Exemple #2
0
  def __init__(self,
               network,
               num_classes,
               initializer='glorot_uniform',
               output='logits',
               dropout_rate=0.1,
               **kwargs):
    self._self_setattr_tracking = False
    self._config = {
        'network': network,
        'num_classes': num_classes,
        'initializer': initializer,
        'output': output,
    }

    # We want to use the inputs of the passed network as the inputs to this
    # Model. To do this, we need to keep a handle to the network inputs for use
    # when we construct the Model object at the end of init.
    inputs = network.inputs

    # Because we have a copy of inputs to create this Model object, we can
    # invoke the Network object with its own input tensors to start the Model.
    _, cls_output = network(inputs)
    cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output)

    self.classifier = networks.Classification(
        input_width=cls_output.shape[-1],
        num_classes=num_classes,
        initializer=initializer,
        output=output,
        name='classification')
    predictions = self.classifier(cls_output)

    super(BertClassifier, self).__init__(
        inputs=inputs, outputs=predictions, **kwargs)
Exemple #3
0
    def __init__(self,
                 network,
                 num_classes,
                 initializer='glorot_uniform',
                 dropout_rate=0.1,
                 use_encoder_pooler=True,
                 **kwargs):
        self._self_setattr_tracking = False
        self._network = network
        self._config = {
            'network': network,
            'num_classes': num_classes,
            'initializer': initializer,
            'use_encoder_pooler': use_encoder_pooler,
        }

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a handle to the network inputs for use
        # when we construct the Model object at the end of init.
        inputs = network.inputs

        if use_encoder_pooler:
            # Because we have a copy of inputs to create this Model object, we can
            # invoke the Network object with its own input tensors to start the Model.
            outputs = network(inputs)
            if isinstance(outputs, list):
                cls_output = outputs[1]
            else:
                cls_output = outputs['pooled_output']
            cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output)

            self.classifier = networks.Classification(
                input_width=cls_output.shape[-1],
                num_classes=num_classes,
                initializer=initializer,
                output='logits',
                name='sentence_prediction')
            predictions = self.classifier(cls_output)
        else:
            outputs = network(inputs)
            if isinstance(outputs, list):
                sequence_output = outputs[0]
            else:
                sequence_output = outputs['sequence_output']
            self.classifier = layers.ClassificationHead(
                inner_dim=sequence_output.shape[-1],
                num_classes=num_classes,
                initializer=initializer,
                dropout_rate=dropout_rate,
                name='sentence_prediction')
            predictions = self.classifier(sequence_output)

        super(BertClassifier, self).__init__(inputs=inputs,
                                             outputs=predictions,
                                             **kwargs)
Exemple #4
0
    def __init__(self,
                 network,
                 num_classes,
                 initializer='glorot_uniform',
                 output='logits',
                 dropout_rate=0.1,
                 **kwargs):
        self._self_setattr_tracking = False
        self._config = {
            'network': network,
            'num_classes': num_classes,
            'initializer': initializer,
            'output': output,
        }
        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a handle to the network inputs for use
        # when we construct the Model object at the end of init.
        inputs = network.inputs

        # Because we have a copy of inputs to create this Model object, we can
        # invoke the Network object with its own input tensors to start the Model.
        sequence_output, cls_output = network(inputs)

        cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output)
        self.classifier = networks.Classification(
            input_width=cls_output.shape[-1],
            num_classes=num_classes,
            initializer=initializer,
            output=output,
            name='classification')

        predictions = self.classifier(cls_output)

        # This is an instance variable for ease of access to the underlying task
        # network.
        self.span_labeling = networks.SpanLabeling(
            input_width=sequence_output.shape[-1],
            initializer=initializer,
            output=output,
            name='span_labeling')
        start_logits, end_logits = self.span_labeling(sequence_output)

        # Use identity layers wrapped in lambdas to explicitly name the output
        # tensors. This allows us to use string-keyed dicts in Keras fit/predict/
        # evaluate calls.
        start_logits = tf.keras.layers.Lambda(
            tf.identity, name='start_positions')(start_logits)
        end_logits = tf.keras.layers.Lambda(tf.identity,
                                            name='end_positions')(end_logits)

        logits = [start_logits, end_logits, predictions]

        super(BertUnifiedLabeler, self).__init__(inputs=inputs,
                                                 outputs=logits,
                                                 **kwargs)
Exemple #5
0
    def __init__(self,
                 network: tf.keras.Model,
                 num_classes: int,
                 initializer: Union[
                     str,
                     tf.keras.initializers.Initializer] = 'glorot_uniform',
                 dropout_rate: float = 0.1,
                 use_mc_dropout: bool = False,
                 **kwargs: Dict[str, Any]):
        """Initializer.

    Args:
      network: A transformer network. This network should output a sequence
        output and a classification output. Furthermore, it should expose its
        embedding table via a "get_embedding_table" method.
      num_classes: Number of classes to predict from the classification network.
      initializer: The initializer (if any) to use in the classification
        networks. Defaults to a Glorot uniform initializer.
      dropout_rate: The dropout probability of the cls head.
      use_mc_dropout: Whether to use MC Dropout before the dense output layer.
      **kwargs: Additional keyword arguments.
    """
        self._self_setattr_tracking = False
        self._network = network
        self._config = {
            'network': network,
            'num_classes': num_classes,
            'initializer': initializer,
            'use_mc_dropout': use_mc_dropout
        }

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a handle to the network inputs for use
        # when we construct the Model object at the end of init.
        inputs = network.inputs

        # Construct classifier using CLS token of the BERT encoder output.
        _, cls_output = network(inputs)

        # Perform MC Dropout on the CLS embedding.
        training = True if use_mc_dropout else None
        cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(
            cls_output, training=training)

        # Produce final logits.
        self.classifier = bert_encoder.Classification(
            input_width=cls_output.shape[-1],
            num_classes=num_classes,
            initializer=initializer,
            output='logits',
            name='sentence_prediction')
        predictions = self.classifier(cls_output)

        super().__init__(inputs=inputs, outputs=predictions, **kwargs)
    def __init__(
            self,
            network: tf.keras.Model,
            num_classes: int,
            gp_layer_kwargs: Dict[str, Any],
            initializer: Optional[tf.keras.initializers.Initializer] = None,
            dropout_rate: float = 0.1,
            use_gp_layer: bool = True,
            **kwargs: Mapping[str, Any]):
        """Initializer.

    Args:
      network: A transformer network. This network should output a sequence
        output and a classification output. Furthermore, it should expose its
        embedding table via a "get_embedding_table" method.
      num_classes: Number of classes to predict from the classification network.
      gp_layer_kwargs: Keyword arguments to Gaussian process layer.
      initializer: The initializer (if any) to use in the classification
        networks. Defaults to a Glorot uniform initializer.
      dropout_rate: The dropout probability of the cls head.
      use_gp_layer: Whether to use Gaussian process output layer.
      **kwargs: Additional keyword arguments.
    """
        self._self_setattr_tracking = False
        self._network = network
        self._config = {
            'network': network,
            'num_classes': num_classes,
            'initializer': initializer,
            'dropout_rate': dropout_rate,
            'use_gp_layer': use_gp_layer,
            'gp_layer_kwargs': gp_layer_kwargs
        }

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a handle to the network inputs for use
        # when we construct the Model object at the end of init.
        inputs = network.inputs

        # Construct classifier using CLS token of the BERT encoder output.
        _, cls_output = network(inputs)
        cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output)

        # Produce final logits.
        if use_gp_layer:
            # We use the stddev=0.05 (i.e., the tf keras default)
            # for the distribution of the random features instead of stddev=1.
            # (which is often suggested by the theoretical literature).
            # The reason is deep BERT model is sensitive to the scaling of the
            # initializers.
            self.classifier = ed.layers.RandomFeatureGaussianProcess(
                units=num_classes,
                scale_random_features=False,
                use_custom_random_features=True,
                kernel_initializer=initializer,
                custom_random_features_initializer=(
                    tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.05)),
                **gp_layer_kwargs)
        else:
            self.classifier = bert_encoder.Classification(
                input_width=cls_output.shape[-1],
                num_classes=num_classes,
                initializer=initializer,
                output='logits',
                name='sentence_prediction')
        predictions = self.classifier(cls_output)

        super().__init__(inputs=inputs, outputs=predictions, **kwargs)
    def __init__(self,
                 network,
                 num_classes,
                 num_token_predictions,
                 embedding_table=None,
                 activation=None,
                 initializer='glorot_uniform',
                 output='logits',
                 **kwargs):

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a copy of the network inputs for use
        # when we construct the Model object at the end of init. (We keep a copy
        # because we'll be adding another tensor to the copy later.)
        network_inputs = network.inputs
        inputs = copy.copy(network_inputs)

        # Because we have a copy of inputs to create this Model object, we can
        # invoke the Network object with its own input tensors to start the Model.
        # Note that, because of how deferred construction happens, we can't use
        # the copy of the list here - by the time the network is invoked, the list
        # object contains the additional input added below.
        sequence_output, cls_output = network(network_inputs)

        # The encoder network may get outputs from all layers.
        if isinstance(sequence_output, list):
            sequence_output = sequence_output[-1]
        if isinstance(cls_output, list):
            cls_output = cls_output[-1]
        sequence_output_length = sequence_output.shape.as_list()[1]
        if sequence_output_length is not None and (sequence_output_length <
                                                   num_token_predictions):
            raise ValueError(
                "The passed network's output length is %s, which is less than the "
                'requested num_token_predictions %s.' %
                (sequence_output_length, num_token_predictions))

        masked_lm_positions = tf.keras.layers.Input(
            shape=(num_token_predictions, ),
            name='masked_lm_positions',
            dtype=tf.int32)
        inputs.append(masked_lm_positions)

        if embedding_table is None:
            embedding_table = network.get_embedding_table()
        masked_lm = layers.MaskedLM(embedding_table=embedding_table,
                                    activation=activation,
                                    initializer=initializer,
                                    output=output,
                                    name='cls/predictions')
        lm_outputs = masked_lm(sequence_output,
                               masked_positions=masked_lm_positions)

        classification = networks.Classification(
            input_width=cls_output.shape[-1],
            num_classes=num_classes,
            initializer=initializer,
            output=output,
            name='classification')
        sentence_outputs = classification(cls_output)

        super(BertPretrainer,
              self).__init__(inputs=inputs,
                             outputs=dict(masked_lm=lm_outputs,
                                          classification=sentence_outputs),
                             **kwargs)

        # b/164516224
        # Once we've created the network using the Functional API, we call
        # super().__init__ as though we were invoking the Functional API Model
        # constructor, resulting in this object having all the properties of a model
        # created using the Functional API. Once super().__init__ is called, we
        # can assign attributes to `self` - note that all `self` assignments are
        # below this line.
        config_dict = {
            'network': network,
            'num_classes': num_classes,
            'num_token_predictions': num_token_predictions,
            'activation': activation,
            'initializer': initializer,
            'output': output,
        }

        # We are storing the config dict as a namedtuple here to ensure checkpoint
        # compatibility with an earlier version of this model which did not track
        # the config dict attribute. TF does not track immutable attrs which
        # do not contain Trackables, so by creating a config namedtuple instead of
        # a dict we avoid tracking it.
        config_cls = collections.namedtuple('Config', config_dict.keys())
        self._config = config_cls(**config_dict)

        self.encoder = network
        self.classification = classification
        self.masked_lm = masked_lm
Exemple #8
0
  def __init__(self,
               network,
               num_classes,
               num_token_predictions,
               float_type,
               activation=None,
               output_activation=None,
               initializer='glorot_uniform',
               output='logits',
               **kwargs):
    self._self_setattr_tracking = False
    self._config = {
        'network': network,
        'num_classes': num_classes,
        'num_token_predictions': num_token_predictions,
        'activation': activation,
        'output_activation': output_activation,
        'initializer': initializer,
        'output': output,
    }

    # We want to use the inputs of the passed network as the inputs to this
    # Model. To do this, we need to keep a copy of the network inputs for use
    # when we construct the Model object at the end of init. (We keep a copy
    # because we'll be adding another tensor to the copy later.)
    network_inputs = network.inputs
    inputs = copy.copy(network_inputs)

    # Because we have a copy of inputs to create this Model object, we can
    # invoke the Network object with its own input tensors to start the Model.
    # Note that, because of how deferred construction happens, we can't use
    # the copy of the list here - by the time the network is invoked, the list
    # object contains the additional input added below.
    sequence_output, cls_output = network(network_inputs)

    sequence_output_length = sequence_output.shape.as_list()[1]
    if sequence_output_length < num_token_predictions:
      raise ValueError(
          "The passed network's output length is %s, which is less than the "
          'requested num_token_predictions %s.' %
          (sequence_output_length, num_token_predictions))

    masked_lm_positions = tf.keras.layers.Input(
        shape=(num_token_predictions,),
        name='masked_lm_positions',
        dtype=tf.int32)
    inputs.append(masked_lm_positions)

    self.masked_lm = networks.MaskedLM(
        num_predictions=num_token_predictions,
        input_width=sequence_output.shape[-1],
        source_network=network,
        float_type=float_type,
        activation=activation,
        initializer=initializer,
        output=output,
        name='masked_lm')
    lm_outputs = self.masked_lm([sequence_output, masked_lm_positions])

    self.classification = networks.Classification(
        input_width=cls_output.shape[-1],
        num_classes=num_classes,
        initializer=initializer,
        output=output,
        name='classification')
    sentence_outputs = self.classification(cls_output)

    super(BertPretrainer, self).__init__(
        inputs=inputs, outputs=[lm_outputs, sentence_outputs], **kwargs)
    def __init__(self,
                 network,
                 num_classes,
                 initializer='glorot_uniform',
                 dropout_rate=0.1,
                 use_encoder_pooler=True,
                 **kwargs):

        # We want to use the inputs of the passed network as the inputs to this
        # Model. To do this, we need to keep a handle to the network inputs for use
        # when we construct the Model object at the end of init.
        inputs = network.inputs

        if use_encoder_pooler:
            # Because we have a copy of inputs to create this Model object, we can
            # invoke the Network object with its own input tensors to start the Model.
            outputs = network(inputs)
            if isinstance(outputs, list):
                cls_output = outputs[1]
            else:
                cls_output = outputs['pooled_output']
            cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output)

            classifier = networks.Classification(
                input_width=cls_output.shape[-1],
                num_classes=num_classes,
                initializer=initializer,
                output='logits',
                name='sentence_prediction')
            predictions = classifier(cls_output)
        else:
            outputs = network(inputs)
            if isinstance(outputs, list):
                sequence_output = outputs[0]
            else:
                sequence_output = outputs['sequence_output']
            classifier = layers.ClassificationHead(
                inner_dim=sequence_output.shape[-1],
                num_classes=num_classes,
                initializer=initializer,
                dropout_rate=dropout_rate,
                name='sentence_prediction')
            predictions = classifier(sequence_output)

        # b/164516224
        # Once we've created the network using the Functional API, we call
        # super().__init__ as though we were invoking the Functional API Model
        # constructor, resulting in this object having all the properties of a model
        # created using the Functional API. Once super().__init__ is called, we
        # can assign attributes to `self` - note that all `self` assignments are
        # below this line.
        super(BertClassifier, self).__init__(inputs=inputs,
                                             outputs=predictions,
                                             **kwargs)
        self._network = network
        config_dict = {
            'network': network,
            'num_classes': num_classes,
            'initializer': initializer,
            'use_encoder_pooler': use_encoder_pooler,
        }

        # We are storing the config dict as a namedtuple here to ensure checkpoint
        # compatibility with an earlier version of this model which did not track
        # the config dict attribute. TF does not track immutable attrs which
        # do not contain Trackables, so by creating a config namedtuple instead of
        # a dict we avoid tracking it.
        config_cls = collections.namedtuple('Config', config_dict.keys())
        self._config = config_cls(**config_dict)
        self.classifier = classifier
Exemple #10
0
    inputs.append(masked_lm_positions)

    if embedding_table is None:
      embedding_table = self.encoder.get_embedding_table()
    self.masked_lm = layers.MaskedLM(
        embedding_table=embedding_table,
        activation=activation,
        initializer=initializer,
        output=output,
        name='cls/predictions')
    lm_outputs = self.masked_lm(
        sequence_output, masked_positions=masked_lm_positions)

    self.classification = networks.Classification(
        input_width=cls_output.shape[-1],
        num_classes=num_classes,
        initializer=initializer,
        output=output,
        name='classification')
    sentence_outputs = self.classification(cls_output)

    super(BertPretrainer, self).__init__(
        inputs=inputs,
        outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs),
        **kwargs)

  def get_config(self):
    return self._config

  @classmethod
  def from_config(cls, config, custom_objects=None):
    return cls(**config)