Beispiel #1
0
    def test_trainable_variables(self):
        """Tests the functionality of automatically collecting trainable
        variables.
        """

        inputs = tf.placeholder(dtype=tf.int32, shape=[None, None])

        # case 1: bert base
        encoder = BERTEncoder()
        _, _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 12 * 16 + 2)

        # case 2: bert large
        hparams = {
            "pretrained_model_name": "bert-large-uncased"
        }
        encoder = BERTEncoder(hparams=hparams)
        _, _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 24 * 16 + 2)

        # case 3: self-designed bert
        hparams = {
            "encoder": {
                "num_blocks": 6,
            },
            "pretrained_model_name": None
        }
        encoder = BERTEncoder(hparams=hparams)
        _, _ = encoder(inputs)
        self.assertEqual(len(encoder.trainable_variables), 3 + 2 + 6 * 16 + 2)
Beispiel #2
0
    def test_encode(self):
        """Tests encoding.
        """
        # case 1: bert base
        hparams = {
            "pretrained_model_name": None
        }
        encoder = BERTEncoder(hparams=hparams)

        max_time = 8
        batch_size = 16
        inputs = tf.random_uniform([batch_size, max_time],
                                   maxval=30521, dtype=tf.int32)
        outputs, pooled_output = encoder(inputs)

        outputs_dim = encoder.hparams.encoder.dim
        pooled_output_dim = encoder.hparams.hidden_size
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            outputs_, pooled_output_ = sess.run([outputs, pooled_output])
            self.assertEqual(outputs_.shape, (batch_size,
                                              max_time, outputs_dim))
            self.assertEqual(pooled_output_.shape, (batch_size,
                                                    pooled_output_dim))

        # case 2: self-designed bert
        hparams = {
            "hidden_size": 100,
            "pretrained_model_name": None
        }
        encoder = BERTEncoder(hparams=hparams)

        max_time = 8
        batch_size = 16
        inputs = tf.random_uniform([batch_size, max_time],
                                   maxval=30521, dtype=tf.int32)
        outputs, pooled_output = encoder(inputs)

        outputs_dim = encoder.hparams.encoder.dim
        pooled_output_dim = encoder.hparams.hidden_size
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            outputs_, pooled_output_ = sess.run([outputs, pooled_output])
            self.assertEqual(outputs_.shape, (batch_size,
                                              max_time, outputs_dim))
            self.assertEqual(pooled_output_.shape,
                             (batch_size, pooled_output_dim))
Beispiel #3
0
    def test_model_loading(self):
        r"""Tests model loading functionality."""

        inputs = tf.placeholder(dtype=tf.int32, shape=[None, None])

        for pretrained_model_name in BERTEncoder.available_checkpoints():
            encoder = BERTEncoder(pretrained_model_name=pretrained_model_name)
            _, _ = encoder(inputs)
Beispiel #4
0
    def test_hparams(self):
        """Tests the priority of the encoder arch parameter.
        """

        inputs = tf.placeholder(dtype=tf.int32, shape=[None, None])

        # case 1: set "pretrained_mode_name" by constructor argument
        hparams = {
            "pretrained_model_name": "bert-large-uncased",
        }
        encoder = BERTEncoder(pretrained_model_name="bert-base-uncased",
                              hparams=hparams)
        _, _ = encoder(inputs)
        self.assertEqual(encoder.hparams.encoder.num_blocks, 12)

        # case 2: set "pretrained_mode_name" by hparams
        hparams = {
            "pretrained_model_name": "bert-large-uncased",
            "encoder": {
                "num_blocks": 6
            }
        }
        encoder = BERTEncoder(hparams=hparams)
        _, _ = encoder(inputs)
        self.assertEqual(encoder.hparams.encoder.num_blocks, 24)

        # case 3: set to None in both hparams and constructor argument
        hparams = {
            "pretrained_model_name": None,
            "encoder": {
                "num_blocks": 6
            },
        }
        encoder = BERTEncoder(hparams=hparams)
        _, _ = encoder(inputs)
        self.assertEqual(encoder.hparams.encoder.num_blocks, 6)

        # case 4: using default hparams
        encoder = BERTEncoder()
        _, _ = encoder(inputs)
        self.assertEqual(encoder.hparams.encoder.num_blocks, 12)
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):

        super(BERTClassifier, self).__init__(hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            # Creates the underlying encoder
            encoder_hparams = dict_fetch(hparams,
                                         BERTEncoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = None
            self._encoder = BERTEncoder(
                pretrained_model_name=pretrained_model_name,
                cache_dir=cache_dir,
                hparams=encoder_hparams)

            # Creates an dropout layer
            drop_kwargs = {"rate": self._hparams.dropout}
            layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs}
            self._dropout_layer = get_layer(hparams=layer_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = get_layer(hparams=layer_hparams)