Exemple #1
0
    def test_network_creation(self):
        hidden_size = 32
        sequence_length = 21
        num_hidden_instances = 3
        embedding_cfg = {
            "vocab_size": 100,
            "type_vocab_size": 16,
            "hidden_size": hidden_size,
            "seq_length": sequence_length,
            "max_seq_length": sequence_length,
            "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "dropout_rate": 0.1,
        }

        call_list = []
        hidden_cfg = {
            "num_attention_heads":
            2,
            "intermediate_size":
            3072,
            "intermediate_activation":
            activations.gelu,
            "dropout_rate":
            0.1,
            "attention_dropout_rate":
            0.1,
            "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "call_list":
            call_list
        }
        # Create a small EncoderScaffold for testing.
        test_network = encoder_scaffold.EncoderScaffold(
            num_hidden_instances=num_hidden_instances,
            num_output_classes=hidden_size,
            classification_layer_initializer=tf.keras.initializers.
            TruncatedNormal(stddev=0.02),
            hidden_cls=ValidatedTransformerLayer,
            hidden_cfg=hidden_cfg,
            embedding_cfg=embedding_cfg)
        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        data, pooled = test_network([word_ids, mask, type_ids])

        expected_data_shape = [None, sequence_length, hidden_size]
        expected_pooled_shape = [None, hidden_size]
        self.assertAllEqual(expected_data_shape, data.shape.as_list())
        self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())

        # The default output dtype is float32.
        self.assertAllEqual(tf.float32, data.dtype)
        self.assertAllEqual(tf.float32, pooled.dtype)

        # If call_list[0] exists and is True, the passed layer class was
        # instantiated from the given config properly.
        self.assertNotEmpty(call_list)
        self.assertTrue(call_list[0],
                        "The passed layer class wasn't instantiated.")
Exemple #2
0
 def _get_network(self, vocab_size):
   sequence_length = 512
   hidden_size = 50
   embedding_cfg = {
       'vocab_size': vocab_size,
       'type_vocab_size': 1,
       'hidden_size': hidden_size,
       'embedding_width': hidden_size,
       'max_seq_length': sequence_length,
       'initializer': tf.keras.initializers.TruncatedNormal(stddev=0.02),
       'dropout_rate': 0.1,
   }
   embedding_inst = packed_sequence_embedding.PackedSequenceEmbedding(
       **embedding_cfg)
   hidden_cfg = {
       'num_attention_heads':
           2,
       'intermediate_size':
           3072,
       'intermediate_activation':
           activations.gelu,
       'dropout_rate':
           0.1,
       'attention_dropout_rate':
           0.1,
       'kernel_initializer':
           tf.keras.initializers.TruncatedNormal(stddev=0.02),
   }
   return encoder_scaffold.EncoderScaffold(
       num_hidden_instances=2,
       pooled_output_dim=hidden_size,
       embedding_cfg=embedding_cfg,
       embedding_cls=embedding_inst,
       hidden_cfg=hidden_cfg,
       dict_outputs=True)
    def test_network_creation_with_float16_dtype(self):
        tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
        hidden_size = 32
        sequence_length = 21
        embedding_cfg = {
            "vocab_size": 100,
            "type_vocab_size": 16,
            "hidden_size": hidden_size,
            "seq_length": sequence_length,
            "max_seq_length": sequence_length,
            "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "dropout_rate": 0.1,
            "dtype": "float16",
        }
        hidden_cfg = {
            "num_attention_heads":
            2,
            "intermediate_size":
            3072,
            "intermediate_activation":
            activations.gelu,
            "dropout_rate":
            0.1,
            "attention_dropout_rate":
            0.1,
            "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "dtype":
            "float16",
        }
        # Create a small EncoderScaffold for testing.
        test_network = encoder_scaffold.EncoderScaffold(
            num_hidden_instances=3,
            num_output_classes=hidden_size,
            classification_layer_initializer=tf.keras.initializers.
            TruncatedNormal(stddev=0.02),
            classification_layer_dtype=tf.float16,
            hidden_cfg=hidden_cfg,
            embedding_cfg=embedding_cfg)
        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        data, pooled = test_network([word_ids, mask, type_ids])

        expected_data_shape = [None, sequence_length, hidden_size]
        expected_pooled_shape = [None, hidden_size]
        self.assertAllEqual(expected_data_shape, data.shape.as_list())
        self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())

        # If float_dtype is set to float16, the output should always be float16.
        self.assertAllEqual(tf.float16, data.dtype)
        self.assertAllEqual(tf.float16, pooled.dtype)
Exemple #4
0
  def test_network_invocation(self):
    hidden_size = 32
    sequence_length = 21
    vocab_size = 57

    # Build an embedding network to swap in for the default network. This one
    # will have 2 inputs (mask and word_ids) instead of 3, and won't use
    # positional embeddings.
    network = Embeddings(vocab_size, hidden_size)

    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
    }

    # Create a small EncoderScaffold for testing.
    test_network = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cfg=hidden_cfg,
        embedding_cls=network)

    # Create the inputs (note that the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    data, pooled = test_network([word_ids, mask])

    # Create a model based off of this network:
    model = tf.keras.Model([word_ids, mask], [data, pooled])

    # Invoke the model. We can't validate the output data here (the model is too
    # complex) but this will catch structural runtime errors.
    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    _ = model.predict([word_id_data, mask_data])
    def test_serialize_deserialize(self):
        # Create a network object that sets all of its config options.
        hidden_size = 32
        sequence_length = 21
        embedding_cfg = {
            "vocab_size": 100,
            "type_vocab_size": 16,
            "hidden_size": hidden_size,
            "seq_length": sequence_length,
            "max_seq_length": sequence_length,
            "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "dropout_rate": 0.1,
        }
        hidden_cfg = {
            "num_attention_heads":
            2,
            "intermediate_size":
            3072,
            "intermediate_activation":
            activations.gelu,
            "dropout_rate":
            0.1,
            "attention_dropout_rate":
            0.1,
            "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
            "dtype":
            "float32",
        }
        # Create a small EncoderScaffold for testing.
        network = encoder_scaffold.EncoderScaffold(
            num_hidden_instances=3,
            num_output_classes=hidden_size,
            classification_layer_initializer=tf.keras.initializers.
            TruncatedNormal(stddev=0.02),
            hidden_cfg=hidden_cfg,
            embedding_cfg=embedding_cfg)

        # Create another network object from the first object's config.
        new_network = encoder_scaffold.EncoderScaffold.from_config(
            network.get_config())

        # Validate that the config can be forced to JSON.
        _ = new_network.to_json()

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(network.get_config(), new_network.get_config())
Exemple #6
0
  def test_serialize_deserialize(self, use_hidden_cls_instance):
    hidden_size = 32
    sequence_length = 21
    vocab_size = 57
    num_types = 7

    embedding_cfg = {
        "vocab_size": vocab_size,
        "type_vocab_size": num_types,
        "hidden_size": hidden_size,
        "seq_length": sequence_length,
        "max_seq_length": sequence_length,
        "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "dropout_rate": 0.1,
    }

    call_list = []
    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "call_list":
            call_list,
        "call_class":
            TestLayer
    }
    mask_call_list = []
    mask_cfg = {"call_list": mask_call_list, "call_class": TestLayer}
    # Create a small EncoderScaffold for testing. This time, we pass an already-
    # instantiated layer object.
    kwargs = dict(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        embedding_cfg=embedding_cfg)

    if use_hidden_cls_instance:
      xformer = ValidatedTransformerLayer(**hidden_cfg)
      xmask = ValidatedMaskLayer(**mask_cfg)
      test_network = encoder_scaffold.EncoderScaffold(
          hidden_cls=xformer, mask_cls=xmask, **kwargs)
    else:
      test_network = encoder_scaffold.EncoderScaffold(
          hidden_cls=ValidatedTransformerLayer,
          hidden_cfg=hidden_cfg,
          mask_cls=ValidatedMaskLayer,
          mask_cfg=mask_cfg,
          **kwargs)

    # Create another network object from the first object's config.
    new_network = encoder_scaffold.EncoderScaffold.from_config(
        test_network.get_config())

    # Validate that the config can be forced to JSON.
    _ = new_network.to_json()

    # If the serialization was successful, the new config should match the old.
    self.assertAllEqual(test_network.get_config(), new_network.get_config())

    # Create a model based off of the old and new networks:
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)

    data, pooled = new_network([word_ids, mask, type_ids])
    new_model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])

    data, pooled = test_network([word_ids, mask, type_ids])
    model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])

    # Copy the weights between models.
    new_model.set_weights(model.get_weights())

    # Invoke the models.
    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    type_id_data = np.random.randint(
        num_types, size=(batch_size, sequence_length))
    data, cls = model.predict([word_id_data, mask_data, type_id_data])
    new_data, new_cls = new_model.predict(
        [word_id_data, mask_data, type_id_data])

    # The output should be equal.
    self.assertAllEqual(data, new_data)
    self.assertAllEqual(cls, new_cls)
Exemple #7
0
  def test_hidden_cls_list(self):
    hidden_size = 32
    sequence_length = 10
    vocab_size = 57

    embedding_network = Embeddings(vocab_size, hidden_size)

    call_list = []
    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "call_list":
            call_list
    }
    mask_call_list = []
    mask_cfg = {
        "call_list": mask_call_list
    }
    # Create a small EncoderScaffold for testing. This time, we pass an already-
    # instantiated layer object.
    xformer = ValidatedTransformerLayer(**hidden_cfg)
    xmask = ValidatedMaskLayer(**mask_cfg)

    test_network_a = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cls=xformer,
        mask_cls=xmask,
        embedding_cls=embedding_network)
    # Create a network b with same embedding and hidden layers as network a.
    test_network_b = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        mask_cls=xmask,
        embedding_cls=test_network_a.embedding_network,
        hidden_cls=test_network_a.hidden_layers)
    # Create a network c with same embedding but fewer hidden layers compared to
    # network a and b.
    hidden_layers = test_network_a.hidden_layers
    hidden_layers.pop()
    test_network_c = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=2,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        mask_cls=xmask,
        embedding_cls=test_network_a.embedding_network,
        hidden_cls=hidden_layers)

    # Create the inputs (note that the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)

    # Create model based off of network a:
    data_a, pooled_a = test_network_a([word_ids, mask])
    model_a = tf.keras.Model([word_ids, mask], [data_a, pooled_a])
    # Create model based off of network b:
    data_b, pooled_b = test_network_b([word_ids, mask])
    model_b = tf.keras.Model([word_ids, mask], [data_b, pooled_b])
    # Create model based off of network b:
    data_c, pooled_c = test_network_c([word_ids, mask])
    model_c = tf.keras.Model([word_ids, mask], [data_c, pooled_c])

    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    output_a, _ = model_a.predict([word_id_data, mask_data])
    output_b, _ = model_b.predict([word_id_data, mask_data])
    output_c, _ = model_c.predict([word_id_data, mask_data])

    # Outputs from model a and b should be the same since they share the same
    # embedding and hidden layers.
    self.assertAllEqual(output_a, output_b)
    # Outputs from model a and c shouldn't be the same since they share the same
    # embedding layer but different number of hidden layers.
    self.assertNotAllEqual(output_a, output_c)
Exemple #8
0
  def test_network_invocation(self):
    hidden_size = 32
    sequence_length = 21
    vocab_size = 57
    num_types = 7

    embedding_cfg = {
        "vocab_size": vocab_size,
        "type_vocab_size": num_types,
        "hidden_size": hidden_size,
        "seq_length": sequence_length,
        "max_seq_length": sequence_length,
        "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "dropout_rate": 0.1,
    }

    call_list = []
    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "call_list":
            call_list
    }
    mask_call_list = []
    mask_cfg = {
        "call_list": mask_call_list
    }
    # Create a small EncoderScaffold for testing. This time, we pass an already-
    # instantiated layer object.

    xformer = ValidatedTransformerLayer(**hidden_cfg)
    xmask = ValidatedMaskLayer(**mask_cfg)

    test_network = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cls=xformer,
        mask_cls=xmask,
        embedding_cfg=embedding_cfg)

    # Create the inputs (note that the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    data, pooled = test_network([word_ids, mask, type_ids])

    # Create a model based off of this network:
    model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])

    # Invoke the model. We can't validate the output data here (the model is too
    # complex) but this will catch structural runtime errors.
    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    type_id_data = np.random.randint(
        num_types, size=(batch_size, sequence_length))
    _ = model.predict([word_id_data, mask_data, type_id_data])

    # If call_list[0] exists and is True, the passed layer class was
    # called as part of the graph creation.
    self.assertNotEmpty(call_list)
    self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.")
Exemple #9
0
  def test_serialize_deserialize(self):
    hidden_size = 32
    sequence_length = 21
    vocab_size = 57

    # Build an embedding network to swap in for the default network. This one
    # will have 2 inputs (mask and word_ids) instead of 3, and won't use
    # positional embeddings.

    word_ids = tf.keras.layers.Input(
        shape=(sequence_length,), dtype=tf.int32, name="input_word_ids")
    mask = tf.keras.layers.Input(
        shape=(sequence_length,), dtype=tf.int32, name="input_mask")
    embedding_layer = layers.OnDeviceEmbedding(
        vocab_size=vocab_size,
        embedding_width=hidden_size,
        initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
        name="word_embeddings")
    word_embeddings = embedding_layer(word_ids)
    attention_mask = layers.SelfAttentionMask()([word_embeddings, mask])
    network = tf.keras.Model([word_ids, mask],
                             [word_embeddings, attention_mask])

    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
    }

    # Create a small EncoderScaffold for testing.
    test_network = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cfg=hidden_cfg,
        embedding_cls=network,
        embedding_data=embedding_layer.embeddings)

    # Create another network object from the first object's config.
    new_network = encoder_scaffold.EncoderScaffold.from_config(
        test_network.get_config())

    # Validate that the config can be forced to JSON.
    _ = new_network.to_json()

    # If the serialization was successful, the new config should match the old.
    self.assertAllEqual(test_network.get_config(), new_network.get_config())

    # Create a model based off of the old and new networks:
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)

    data, pooled = new_network([word_ids, mask])
    new_model = tf.keras.Model([word_ids, mask], [data, pooled])

    data, pooled = test_network([word_ids, mask])
    model = tf.keras.Model([word_ids, mask], [data, pooled])

    # Copy the weights between models.
    new_model.set_weights(model.get_weights())

    # Invoke the models.
    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    data, cls = model.predict([word_id_data, mask_data])
    new_data, new_cls = new_model.predict([word_id_data, mask_data])

    # The output should be equal.
    self.assertAllEqual(data, new_data)
    self.assertAllEqual(cls, new_cls)

    # We should not be able to get a reference to the embedding data.
    with self.assertRaisesRegex(RuntimeError, ".*does not have a reference.*"):
      new_network.get_embedding_table()
Exemple #10
0
  def test_network_invocation(self):
    hidden_size = 32
    sequence_length = 21
    vocab_size = 57
    num_types = 7
    embedding_cfg = {
        "vocab_size": vocab_size,
        "type_vocab_size": num_types,
        "hidden_size": hidden_size,
        "seq_length": sequence_length,
        "max_seq_length": sequence_length,
        "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "dropout_rate": 0.1,
    }
    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
    }
    # Create a small EncoderScaffold for testing.
    test_network = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cfg=hidden_cfg,
        embedding_cfg=embedding_cfg,
        dict_outputs=True)

    # Create the inputs (note that the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    outputs = test_network([word_ids, mask, type_ids])

    # Create a model based off of this network:
    model = tf.keras.Model([word_ids, mask, type_ids], outputs)

    # Invoke the model. We can't validate the output data here (the model is too
    # complex) but this will catch structural runtime errors.
    batch_size = 3
    word_id_data = np.random.randint(
        vocab_size, size=(batch_size, sequence_length))
    mask_data = np.random.randint(2, size=(batch_size, sequence_length))
    type_id_data = np.random.randint(
        num_types, size=(batch_size, sequence_length))
    preds = model.predict([word_id_data, mask_data, type_id_data])
    self.assertEqual(preds["pooled_output"].shape, (3, hidden_size))

    # Creates a EncoderScaffold with max_sequence_length != sequence_length
    num_types = 7
    embedding_cfg = {
        "vocab_size": vocab_size,
        "type_vocab_size": num_types,
        "hidden_size": hidden_size,
        "seq_length": sequence_length,
        "max_seq_length": sequence_length * 2,
        "initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02),
        "dropout_rate": 0.1,
    }
    hidden_cfg = {
        "num_attention_heads":
            2,
        "intermediate_size":
            3072,
        "intermediate_activation":
            activations.gelu,
        "dropout_rate":
            0.1,
        "attention_dropout_rate":
            0.1,
        "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
    }
    # Create a small EncoderScaffold for testing.
    test_network = encoder_scaffold.EncoderScaffold(
        num_hidden_instances=3,
        pooled_output_dim=hidden_size,
        pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=0.02),
        hidden_cfg=hidden_cfg,
        embedding_cfg=embedding_cfg)
    outputs = test_network([word_ids, mask, type_ids])
    model = tf.keras.Model([word_ids, mask, type_ids], outputs)
    _ = model.predict([word_id_data, mask_data, type_id_data])
Exemple #11
0
    def test_network_invocation(self):
        hidden_size = 32
        sequence_length = 21
        vocab_size = 57

        # Build an embedding network to swap in for the default network. This one
        # will have 2 inputs (mask and word_ids) instead of 3, and won't use
        # positional embeddings.

        word_ids = tf.keras.layers.Input(shape=(sequence_length, ),
                                         dtype=tf.int32,
                                         name="input_word_ids")
        mask = tf.keras.layers.Input(shape=(sequence_length, ),
                                     dtype=tf.int32,
                                     name="input_mask")
        embedding_layer = layers.OnDeviceEmbedding(
            vocab_size=vocab_size,
            embedding_width=hidden_size,
            initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
            name="word_embeddings")
        word_embeddings = embedding_layer(word_ids)
        network = tf.keras.Model([word_ids, mask], [word_embeddings, mask])

        hidden_cfg = {
            "num_attention_heads":
            2,
            "intermediate_size":
            3072,
            "intermediate_activation":
            activations.gelu,
            "dropout_rate":
            0.1,
            "attention_dropout_rate":
            0.1,
            "kernel_initializer":
            tf.keras.initializers.TruncatedNormal(stddev=0.02),
        }

        # Create a small EncoderScaffold for testing.
        test_network = encoder_scaffold.EncoderScaffold(
            num_hidden_instances=3,
            num_output_classes=hidden_size,
            classification_layer_initializer=tf.keras.initializers.
            TruncatedNormal(stddev=0.02),
            hidden_cfg=hidden_cfg,
            embedding_cls=network,
            embedding_data=embedding_layer.embeddings)

        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        data, pooled = test_network([word_ids, mask])

        # Create a model based off of this network:
        model = tf.keras.Model([word_ids, mask], [data, pooled])

        # Invoke the model. We can't validate the output data here (the model is too
        # complex) but this will catch structural runtime errors.
        batch_size = 3
        word_id_data = np.random.randint(vocab_size,
                                         size=(batch_size, sequence_length))
        mask_data = np.random.randint(2, size=(batch_size, sequence_length))
        _ = model.predict([word_id_data, mask_data])

        # Test that we can get the embedding data that we passed to the object. This
        # is necessary to support standard language model training.
        self.assertIs(embedding_layer.embeddings,
                      test_network.get_embedding_table())