def test_trainable_variables(self): r"""Tests the functionality of automatically collecting trainable variables. """ inputs = torch.zeros(32, 16, dtype=torch.int64) # case 1: bert base encoder = BertEncoder() _, _ = encoder(inputs) self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 12 * 16 + 2) # case 2: bert large hparams = {"pretrained_model_name": "bert-large-uncased"} encoder = BertEncoder(hparams=hparams) _, _ = encoder(inputs) self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 24 * 16 + 2) # case 3: self-designed bert hparams = { "encoder": { "num_blocks": 6, }, "pretrained_model_name": None } encoder = BertEncoder(hparams=hparams) _, _ = encoder(inputs) self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 6 * 16 + 2)
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams) # Create the underlying encoder encoder_hparams = utils.dict_fetch(hparams, BertEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = BertEncoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Create a dropout layer self._dropout_layer = nn.Dropout(self._hparams.dropout) # Create an additional classification layer if needed self.num_classes = self._hparams.num_classes if self.num_classes <= 0: self._logits_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError("hparams['logit_layer_kwargs'] " "must be a dict.") else: logit_kwargs = logit_kwargs.todict() if self._hparams.clas_strategy == 'all_time': self._logits_layer = nn.Linear( self._hparams.hidden_size * self._hparams.max_seq_length, self.num_classes, **logit_kwargs) else: self._logits_layer = nn.Linear(self._hparams.hidden_size, self.num_classes, **logit_kwargs) self.is_binary = (self.num_classes == 1) or \ (self.num_classes <= 0 and self._hparams.encoder.dim == 1)
def test_encode(self): """Tests encoding. """ # case 1: bert base encoder = BertEncoder() max_time = 8 batch_size = 16 inputs = tf.random_uniform([batch_size, max_time], maxval=30521, dtype=tf.int32) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim pooled_output_dim = encoder.hparams.hidden_size with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, pooled_output_ = sess.run([outputs, pooled_output]) self.assertEqual(outputs_.shape, (batch_size, max_time, outputs_dim)) self.assertEqual(pooled_output_.shape, (batch_size, pooled_output_dim)) # case 2: self-designed bert hparams = {"hidden_size": 100, "pretrained_model_name": None} encoder = BertEncoder(hparams=hparams) max_time = 8 batch_size = 16 inputs = tf.random_uniform([batch_size, max_time], maxval=30521, dtype=tf.int32) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim pooled_output_dim = encoder.hparams.hidden_size with self.test_session() as sess: sess.run(tf.global_variables_initializer()) outputs_, pooled_output_ = sess.run([outputs, pooled_output]) self.assertEqual(outputs_.shape, (batch_size, max_time, outputs_dim)) self.assertEqual(pooled_output_.shape, (batch_size, pooled_output_dim))
def test_hparams(self): """Tests the priority of the encoder arch parameter. """ inputs = tf.placeholder(dtype=tf.int32, shape=[None, None]) # case 1: set "pretrained_mode_name" by constructor argument hparams = { "pretrained_model_name": "bert-large-uncased", } encoder = BertEncoder(pretrained_model_name="bert-base-uncased", hparams=hparams) _, _ = encoder(inputs) self.assertEqual(encoder.hparams.encoder.num_blocks, 12) # case 2: set "pretrained_mode_name" by hparams hparams = { "pretrained_model_name": "bert-large-uncased", "encoder": { "num_blocks": 6 } } encoder = BertEncoder(hparams=hparams) _, _ = encoder(inputs) self.assertEqual(encoder.hparams.encoder.num_blocks, 24) # case 3: set to None in both hparams and constructor argument hparams = { "pretrained_model_name": None, "encoder": { "num_blocks": 6 }, } encoder = BertEncoder(hparams=hparams) _, _ = encoder(inputs) self.assertEqual(encoder.hparams.encoder.num_blocks, 6) # case 4: using default hparams encoder = BertEncoder() _, _ = encoder(inputs) self.assertEqual(encoder.hparams.encoder.num_blocks, 12)
def test_encode(self): r"""Tests encoding. """ # case 1: bert base encoder = BertEncoder() max_time = 8 batch_size = 16 inputs = torch.randint(30521, (batch_size, max_time), dtype=torch.int64) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim pooled_output_dim = encoder.hparams.hidden_size self.assertEqual(outputs.shape, torch.Size([batch_size, max_time, outputs_dim])) self.assertEqual(pooled_output.shape, torch.Size([batch_size, pooled_output_dim])) # case 2: self-designed bert hparams = { 'pretrained_model_name': None, 'embed': { 'dim': 96 }, 'segment_embed': { 'dim': 96 }, 'position_embed': { 'dim': 96 }, 'encoder': { 'dim': 96, 'multihead_attention': { 'num_units': 96, 'output_dim': 96, }, 'poswise_feedforward': { 'layers': [{ 'kwargs': { 'in_features': 96, 'out_features': 96 * 4, 'bias': True }, 'type': 'Linear' }, { "type": "BertGELU" }, { 'kwargs': { 'in_features': 96 * 4, 'out_features': 96, 'bias': True }, 'type': 'Linear' }] }, }, 'hidden_size': 96 } encoder = BertEncoder(hparams=hparams) max_time = 8 batch_size = 16 inputs = torch.randint(30521, (batch_size, max_time), dtype=torch.int64) outputs, pooled_output = encoder(inputs) outputs_dim = encoder.hparams.encoder.dim pooled_output_dim = encoder.hparams.hidden_size self.assertEqual(outputs.shape, torch.Size([batch_size, max_time, outputs_dim])) self.assertEqual(pooled_output.shape, torch.Size([batch_size, pooled_output_dim]))
def default_hparams(): r"""Returns a dictionary of hyperparameters with default values. .. code-block:: python { # (1) Same hyperparameters as in BertEncoder ... # (2) Additional hyperparameters "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": "cls_time", "max_seq_length": None, "dropout": 0.1, "name": "bert_classifier" } Here: 1. Same hyperparameters as in :class:`~texar.modules.BertEncoder`. See the :meth:`~texar.modules.BertEncoder.default_hparams`. An instance of BertEncoder is created for feature extraction. 2. Additional hyperparameters: `num_classes`: int Number of classes: - If **> 0**, an additional `Linear` layer is appended to the encoder to compute the logits over classes. - If **<= 0**, no dense layer is appended. The number of classes is assumed to be the final dense layer size of the encoder. `logit_layer_kwargs`: dict Keyword arguments for the logit Dense layer constructor, except for argument "units" which is set to `num_classes`. Ignored if no extra logit layer is appended. `clas_strategy`: str The classification strategy, one of: - **cls_time**: Sequence-level classification based on the output of the first time step (which is the `CLS` token). Each sequence has a class. - **all_time**: Sequence-level classification based on the output of all time steps. Each sequence has a class. - **time_wise**: Step-wise classification, i.e., make classification for each time step based on its output. `max_seq_length`: int, optional Maximum possible length of input sequences. Required if `clas_strategy` is `all_time`. `dropout`: float The dropout rate of the BERT encoder output. `name`: str Name of the classifier. """ hparams = BertEncoder.default_hparams() hparams.update({ "num_classes": 2, "logit_layer_kwargs": None, "clas_strategy": "cls_time", "max_seq_length": None, "dropout": 0.1, "name": "bert_classifier" }) return hparams