def create_classification_model(self, input_width, num_classes): test_object = networks.Classification(input_width=input_width, num_classes=num_classes) # Create a 2-dimensional input (the first dimension is implicit). pooled_data = tf.keras.Input(shape=(input_width, ), dtype=tf.float32) output = test_object(pooled_data) return tf.keras.Model(pooled_data, output)
def __init__(self, network, num_classes, initializer='glorot_uniform', output='logits', dropout_rate=0.1, **kwargs): self._self_setattr_tracking = False self._config = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'output': output, } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. _, cls_output = network(inputs) cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) self.classifier = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') predictions = self.classifier(cls_output) super(BertClassifier, self).__init__( inputs=inputs, outputs=predictions, **kwargs)
def __init__(self, network, num_classes, initializer='glorot_uniform', dropout_rate=0.1, use_encoder_pooler=True, **kwargs): self._self_setattr_tracking = False self._network = network self._config = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'use_encoder_pooler': use_encoder_pooler, } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs if use_encoder_pooler: # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. outputs = network(inputs) if isinstance(outputs, list): cls_output = outputs[1] else: cls_output = outputs['pooled_output'] cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) self.classifier = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output='logits', name='sentence_prediction') predictions = self.classifier(cls_output) else: outputs = network(inputs) if isinstance(outputs, list): sequence_output = outputs[0] else: sequence_output = outputs['sequence_output'] self.classifier = layers.ClassificationHead( inner_dim=sequence_output.shape[-1], num_classes=num_classes, initializer=initializer, dropout_rate=dropout_rate, name='sentence_prediction') predictions = self.classifier(sequence_output) super(BertClassifier, self).__init__(inputs=inputs, outputs=predictions, **kwargs)
def __init__(self, network, num_classes, initializer='glorot_uniform', output='logits', dropout_rate=0.1, **kwargs): self._self_setattr_tracking = False self._config = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'output': output, } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. sequence_output, cls_output = network(inputs) cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) self.classifier = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') predictions = self.classifier(cls_output) # This is an instance variable for ease of access to the underlying task # network. self.span_labeling = networks.SpanLabeling( input_width=sequence_output.shape[-1], initializer=initializer, output=output, name='span_labeling') start_logits, end_logits = self.span_labeling(sequence_output) # Use identity layers wrapped in lambdas to explicitly name the output # tensors. This allows us to use string-keyed dicts in Keras fit/predict/ # evaluate calls. start_logits = tf.keras.layers.Lambda( tf.identity, name='start_positions')(start_logits) end_logits = tf.keras.layers.Lambda(tf.identity, name='end_positions')(end_logits) logits = [start_logits, end_logits, predictions] super(BertUnifiedLabeler, self).__init__(inputs=inputs, outputs=logits, **kwargs)
def __init__(self, network: tf.keras.Model, num_classes: int, initializer: Union[ str, tf.keras.initializers.Initializer] = 'glorot_uniform', dropout_rate: float = 0.1, use_mc_dropout: bool = False, **kwargs: Dict[str, Any]): """Initializer. Args: network: A transformer network. This network should output a sequence output and a classification output. Furthermore, it should expose its embedding table via a "get_embedding_table" method. num_classes: Number of classes to predict from the classification network. initializer: The initializer (if any) to use in the classification networks. Defaults to a Glorot uniform initializer. dropout_rate: The dropout probability of the cls head. use_mc_dropout: Whether to use MC Dropout before the dense output layer. **kwargs: Additional keyword arguments. """ self._self_setattr_tracking = False self._network = network self._config = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'use_mc_dropout': use_mc_dropout } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs # Construct classifier using CLS token of the BERT encoder output. _, cls_output = network(inputs) # Perform MC Dropout on the CLS embedding. training = True if use_mc_dropout else None cls_output = tf.keras.layers.Dropout(rate=dropout_rate)( cls_output, training=training) # Produce final logits. self.classifier = bert_encoder.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output='logits', name='sentence_prediction') predictions = self.classifier(cls_output) super().__init__(inputs=inputs, outputs=predictions, **kwargs)
def __init__( self, network: tf.keras.Model, num_classes: int, gp_layer_kwargs: Dict[str, Any], initializer: Optional[tf.keras.initializers.Initializer] = None, dropout_rate: float = 0.1, use_gp_layer: bool = True, **kwargs: Mapping[str, Any]): """Initializer. Args: network: A transformer network. This network should output a sequence output and a classification output. Furthermore, it should expose its embedding table via a "get_embedding_table" method. num_classes: Number of classes to predict from the classification network. gp_layer_kwargs: Keyword arguments to Gaussian process layer. initializer: The initializer (if any) to use in the classification networks. Defaults to a Glorot uniform initializer. dropout_rate: The dropout probability of the cls head. use_gp_layer: Whether to use Gaussian process output layer. **kwargs: Additional keyword arguments. """ self._self_setattr_tracking = False self._network = network self._config = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'dropout_rate': dropout_rate, 'use_gp_layer': use_gp_layer, 'gp_layer_kwargs': gp_layer_kwargs } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs # Construct classifier using CLS token of the BERT encoder output. _, cls_output = network(inputs) cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) # Produce final logits. if use_gp_layer: # We use the stddev=0.05 (i.e., the tf keras default) # for the distribution of the random features instead of stddev=1. # (which is often suggested by the theoretical literature). # The reason is deep BERT model is sensitive to the scaling of the # initializers. self.classifier = ed.layers.RandomFeatureGaussianProcess( units=num_classes, scale_random_features=False, use_custom_random_features=True, kernel_initializer=initializer, custom_random_features_initializer=( tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.05)), **gp_layer_kwargs) else: self.classifier = bert_encoder.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output='logits', name='sentence_prediction') predictions = self.classifier(cls_output) super().__init__(inputs=inputs, outputs=predictions, **kwargs)
def __init__(self, network, num_classes, num_token_predictions, embedding_table=None, activation=None, initializer='glorot_uniform', output='logits', **kwargs): # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a copy of the network inputs for use # when we construct the Model object at the end of init. (We keep a copy # because we'll be adding another tensor to the copy later.) network_inputs = network.inputs inputs = copy.copy(network_inputs) # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. # Note that, because of how deferred construction happens, we can't use # the copy of the list here - by the time the network is invoked, the list # object contains the additional input added below. sequence_output, cls_output = network(network_inputs) # The encoder network may get outputs from all layers. if isinstance(sequence_output, list): sequence_output = sequence_output[-1] if isinstance(cls_output, list): cls_output = cls_output[-1] sequence_output_length = sequence_output.shape.as_list()[1] if sequence_output_length is not None and (sequence_output_length < num_token_predictions): raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % (sequence_output_length, num_token_predictions)) masked_lm_positions = tf.keras.layers.Input( shape=(num_token_predictions, ), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) if embedding_table is None: embedding_table = network.get_embedding_table() masked_lm = layers.MaskedLM(embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') lm_outputs = masked_lm(sequence_output, masked_positions=masked_lm_positions) classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = classification(cls_output) super(BertPretrainer, self).__init__(inputs=inputs, outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs), **kwargs) # b/164516224 # Once we've created the network using the Functional API, we call # super().__init__ as though we were invoking the Functional API Model # constructor, resulting in this object having all the properties of a model # created using the Functional API. Once super().__init__ is called, we # can assign attributes to `self` - note that all `self` assignments are # below this line. config_dict = { 'network': network, 'num_classes': num_classes, 'num_token_predictions': num_token_predictions, 'activation': activation, 'initializer': initializer, 'output': output, } # We are storing the config dict as a namedtuple here to ensure checkpoint # compatibility with an earlier version of this model which did not track # the config dict attribute. TF does not track immutable attrs which # do not contain Trackables, so by creating a config namedtuple instead of # a dict we avoid tracking it. config_cls = collections.namedtuple('Config', config_dict.keys()) self._config = config_cls(**config_dict) self.encoder = network self.classification = classification self.masked_lm = masked_lm
def __init__(self, network, num_classes, num_token_predictions, float_type, activation=None, output_activation=None, initializer='glorot_uniform', output='logits', **kwargs): self._self_setattr_tracking = False self._config = { 'network': network, 'num_classes': num_classes, 'num_token_predictions': num_token_predictions, 'activation': activation, 'output_activation': output_activation, 'initializer': initializer, 'output': output, } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a copy of the network inputs for use # when we construct the Model object at the end of init. (We keep a copy # because we'll be adding another tensor to the copy later.) network_inputs = network.inputs inputs = copy.copy(network_inputs) # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. # Note that, because of how deferred construction happens, we can't use # the copy of the list here - by the time the network is invoked, the list # object contains the additional input added below. sequence_output, cls_output = network(network_inputs) sequence_output_length = sequence_output.shape.as_list()[1] if sequence_output_length < num_token_predictions: raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % (sequence_output_length, num_token_predictions)) masked_lm_positions = tf.keras.layers.Input( shape=(num_token_predictions,), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) self.masked_lm = networks.MaskedLM( num_predictions=num_token_predictions, input_width=sequence_output.shape[-1], source_network=network, float_type=float_type, activation=activation, initializer=initializer, output=output, name='masked_lm') lm_outputs = self.masked_lm([sequence_output, masked_lm_positions]) self.classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = self.classification(cls_output) super(BertPretrainer, self).__init__( inputs=inputs, outputs=[lm_outputs, sentence_outputs], **kwargs)
def __init__(self, network, num_classes, initializer='glorot_uniform', dropout_rate=0.1, use_encoder_pooler=True, **kwargs): # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use # when we construct the Model object at the end of init. inputs = network.inputs if use_encoder_pooler: # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. outputs = network(inputs) if isinstance(outputs, list): cls_output = outputs[1] else: cls_output = outputs['pooled_output'] cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) classifier = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output='logits', name='sentence_prediction') predictions = classifier(cls_output) else: outputs = network(inputs) if isinstance(outputs, list): sequence_output = outputs[0] else: sequence_output = outputs['sequence_output'] classifier = layers.ClassificationHead( inner_dim=sequence_output.shape[-1], num_classes=num_classes, initializer=initializer, dropout_rate=dropout_rate, name='sentence_prediction') predictions = classifier(sequence_output) # b/164516224 # Once we've created the network using the Functional API, we call # super().__init__ as though we were invoking the Functional API Model # constructor, resulting in this object having all the properties of a model # created using the Functional API. Once super().__init__ is called, we # can assign attributes to `self` - note that all `self` assignments are # below this line. super(BertClassifier, self).__init__(inputs=inputs, outputs=predictions, **kwargs) self._network = network config_dict = { 'network': network, 'num_classes': num_classes, 'initializer': initializer, 'use_encoder_pooler': use_encoder_pooler, } # We are storing the config dict as a namedtuple here to ensure checkpoint # compatibility with an earlier version of this model which did not track # the config dict attribute. TF does not track immutable attrs which # do not contain Trackables, so by creating a config namedtuple instead of # a dict we avoid tracking it. config_cls = collections.namedtuple('Config', config_dict.keys()) self._config = config_cls(**config_dict) self.classifier = classifier
inputs.append(masked_lm_positions) if embedding_table is None: embedding_table = self.encoder.get_embedding_table() self.masked_lm = layers.MaskedLM( embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') lm_outputs = self.masked_lm( sequence_output, masked_positions=masked_lm_positions) self.classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') sentence_outputs = self.classification(cls_output) super(BertPretrainer, self).__init__( inputs=inputs, outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs), **kwargs) def get_config(self): return self._config @classmethod def from_config(cls, config, custom_objects=None): return cls(**config)