예제 #1
0
  def test_all_encoder_outputs_network_creation(self, pool_stride,
                                                unpool_length):
    hidden_size = 32
    sequence_length = 21
    num_layers = 3
    # Create a small FunnelTransformerEncoder for testing.
    test_network = funnel_transformer.FunnelTransformerEncoder(
        vocab_size=100,
        hidden_size=hidden_size,
        num_attention_heads=2,
        num_layers=num_layers,
        pool_stride=pool_stride,
        unpool_length=unpool_length)
    # Create the inputs (note that the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    dict_outputs = test_network([word_ids, mask, type_ids])
    all_encoder_outputs = dict_outputs["encoder_outputs"]
    pooled = dict_outputs["pooled_output"]

    expected_data_shape = [None, sequence_length, hidden_size]
    expected_pooled_shape = [None, hidden_size]
    self.assertLen(all_encoder_outputs, num_layers)
    for data in all_encoder_outputs:
      expected_data_shape[1] = unpool_length + (expected_data_shape[1] +
                                                pool_stride - 1 -
                                                unpool_length) // pool_stride
      print("shapes:", expected_data_shape, data.shape.as_list())
      self.assertAllEqual(expected_data_shape, data.shape.as_list())
    self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())

    # The default output dtype is float32.
    self.assertAllEqual(tf.float32, all_encoder_outputs[-1].dtype)
    self.assertAllEqual(tf.float32, pooled.dtype)
예제 #2
0
    def test_serialize_deserialize(self):
        # Create a network object that sets all of its config options.
        kwargs = dict(vocab_size=100,
                      hidden_size=32,
                      num_layers=3,
                      num_attention_heads=2,
                      max_sequence_length=21,
                      type_vocab_size=12,
                      inner_dim=1223,
                      inner_activation="relu",
                      output_dropout=0.05,
                      attention_dropout=0.22,
                      initializer="glorot_uniform",
                      output_range=-1,
                      embedding_width=16,
                      embedding_layer=None,
                      norm_first=False,
                      pool_type="max",
                      pool_stride=2,
                      unpool_length=0,
                      transformer_cls="TransformerEncoderBlock")
        network = funnel_transformer.FunnelTransformerEncoder(**kwargs)
        expected_config = dict(kwargs)
        expected_config["inner_activation"] = tf.keras.activations.serialize(
            tf.keras.activations.get(expected_config["inner_activation"]))
        expected_config["initializer"] = tf.keras.initializers.serialize(
            tf.keras.initializers.get(expected_config["initializer"]))
        self.assertEqual(network.get_config(), expected_config)
        # Create another network object from the first object's config.
        new_network = funnel_transformer.FunnelTransformerEncoder.from_config(
            network.get_config())

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(network.get_config(), new_network.get_config())

        # Tests model saving/loading.
        model_path = self.get_temp_dir() + "/model"
        network_wrapper = SingleLayerModel(network)
        # One forward-path to ensure input_shape.
        batch_size = 3
        sequence_length = 21
        vocab_size = 100
        num_types = 12
        word_id_data = np.random.randint(vocab_size,
                                         size=(batch_size, sequence_length))
        mask_data = np.random.randint(2, size=(batch_size, sequence_length))
        type_id_data = np.random.randint(num_types,
                                         size=(batch_size, sequence_length))

        _ = network_wrapper.predict([word_id_data, mask_data, type_id_data])
        network_wrapper.save(model_path)
        _ = tf.keras.models.load_model(model_path)
    def test_network_creation_dense(self):
        tf.keras.mixed_precision.set_global_policy("mixed_float16")
        pool_type = "avg"

        hidden_size = 32
        sequence_length = 21
        dense_sequence_length = 3
        pool_stride = 2
        num_layers = 3
        # Create a small FunnelTransformerEncoder for testing.
        test_network = funnel_transformer.FunnelTransformerEncoder(
            vocab_size=100,
            hidden_size=hidden_size,
            num_attention_heads=2,
            num_layers=num_layers,
            pool_stride=pool_stride,
            pool_type=pool_type,
            max_sequence_length=sequence_length + dense_sequence_length,
            unpool_length=0,
            transformer_cls="TransformerEncoderBlock")
        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)

        dense_inputs = tf.keras.Input(shape=(dense_sequence_length,
                                             hidden_size),
                                      dtype=tf.float32)
        dense_mask = tf.keras.Input(shape=(dense_sequence_length, ),
                                    dtype=tf.int32)
        dense_type_ids = tf.keras.Input(shape=(dense_sequence_length, ),
                                        dtype=tf.int32)

        dict_outputs = test_network([
            word_ids, mask, type_ids, dense_inputs, dense_mask, dense_type_ids
        ])
        data = dict_outputs["sequence_output"]
        pooled = dict_outputs["pooled_output"]

        self.assertIsInstance(test_network.transformer_layers, list)
        self.assertLen(test_network.transformer_layers, num_layers)
        self.assertIsInstance(test_network.pooler_layer, tf.keras.layers.Dense)

        # Stride=2 compresses sequence length to half the size at each layer.
        # For pool_type = max or avg,
        # this configuration gives each layer of seq length: 24->12->6->3.
        expected_data_shape = [None, 3, hidden_size]
        expected_pooled_shape = [None, hidden_size]

        self.assertAllEqual(expected_data_shape, data.shape.as_list())
        self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())
 def test_invalid_stride_and_num_layers(self):
     hidden_size = 32
     num_layers = 3
     pool_stride = [2, 2]
     unpool_length = 1
     with self.assertRaisesRegex(
             ValueError, "pool_stride and num_layers are not equal"):
         _ = funnel_transformer.FunnelTransformerEncoder(
             vocab_size=100,
             hidden_size=hidden_size,
             num_attention_heads=2,
             num_layers=num_layers,
             pool_stride=pool_stride,
             unpool_length=unpool_length)
예제 #5
0
    def test_network_creation(self, policy, pooled_dtype, pool_type,
                              transformer_cls):
        tf.keras.mixed_precision.set_global_policy(policy)

        hidden_size = 32
        sequence_length = 21
        pool_stride = 2
        num_layers = 3
        # Create a small FunnelTransformerEncoder for testing.
        test_network = funnel_transformer.FunnelTransformerEncoder(
            vocab_size=100,
            hidden_size=hidden_size,
            num_attention_heads=2,
            num_layers=num_layers,
            pool_stride=pool_stride,
            pool_type=pool_type,
            max_sequence_length=sequence_length,
            unpool_length=0,
            transformer_cls=transformer_cls)
        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        dict_outputs = test_network([word_ids, mask, type_ids])
        data = dict_outputs["sequence_output"]
        pooled = dict_outputs["pooled_output"]

        self.assertIsInstance(test_network.transformer_layers, list)
        self.assertLen(test_network.transformer_layers, num_layers)
        self.assertIsInstance(test_network.pooler_layer, tf.keras.layers.Dense)

        # Stride=2 compresses sequence length to half the size at each layer.
        # For pool_type = max or avg,
        # this configuration gives each layer of seq length: 21->11->6->3.
        # For pool_type = truncated_avg,
        # seq length: 21->10->5->2.
        if pool_type in ["max", "avg"]:
            expected_data_shape = [None, 3, hidden_size]
        else:
            expected_data_shape = [None, 2, hidden_size]
        expected_pooled_shape = [None, hidden_size]

        self.assertAllEqual(expected_data_shape, data.shape.as_list())
        self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list())

        # The default output dtype is float32.
        # If float_dtype is set to float16, the data output is float32 (from a layer
        # norm) and pool output should be float16.
        self.assertAllEqual(tf.float32, data.dtype)
        self.assertAllEqual(pooled_dtype, pooled.dtype)
    def test_network_invocation(self, output_range, out_seq_len,
                                unpool_length):
        hidden_size = 32
        sequence_length = 21
        vocab_size = 57
        num_types = 7
        pool_stride = 2
        # Create a small FunnelTransformerEncoder for testing.
        test_network = funnel_transformer.FunnelTransformerEncoder(
            vocab_size=vocab_size,
            hidden_size=hidden_size,
            num_attention_heads=2,
            num_layers=3,
            type_vocab_size=num_types,
            output_range=output_range,
            pool_stride=pool_stride,
            unpool_length=unpool_length)
        # Create the inputs (note that the first dimension is implicit).
        word_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        mask = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        type_ids = tf.keras.Input(shape=(sequence_length, ), dtype=tf.int32)
        dict_outputs = test_network([word_ids, mask, type_ids])
        data = dict_outputs["sequence_output"]
        pooled = dict_outputs["pooled_output"]

        # Create a model based off of this network:
        model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])

        # Invoke the model. We can't validate the output data here (the model is too
        # complex) but this will catch structural runtime errors.
        batch_size = 3
        word_id_data = np.random.randint(vocab_size,
                                         size=(batch_size, sequence_length))
        mask_data = np.random.randint(2, size=(batch_size, sequence_length))
        type_id_data = np.random.randint(num_types,
                                         size=(batch_size, sequence_length))
        outputs = model.predict([word_id_data, mask_data, type_id_data])
        self.assertEqual(outputs[0].shape[1], out_seq_len)  # output_range

        # Creates a FunnelTransformerEncoder with max_sequence_length !=
        # sequence_length
        max_sequence_length = 128
        test_network = funnel_transformer.FunnelTransformerEncoder(
            vocab_size=vocab_size,
            hidden_size=hidden_size,
            max_sequence_length=max_sequence_length,
            num_attention_heads=2,
            num_layers=3,
            type_vocab_size=num_types,
            pool_stride=pool_stride)
        dict_outputs = test_network([word_ids, mask, type_ids])
        data = dict_outputs["sequence_output"]
        pooled = dict_outputs["pooled_output"]
        model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])
        outputs = model.predict([word_id_data, mask_data, type_id_data])
        self.assertEqual(outputs[0].shape[1], 3)

        # Creates a FunnelTransformerEncoder with embedding_width != hidden_size
        test_network = funnel_transformer.FunnelTransformerEncoder(
            vocab_size=vocab_size,
            hidden_size=hidden_size,
            max_sequence_length=max_sequence_length,
            num_attention_heads=2,
            num_layers=3,
            type_vocab_size=num_types,
            embedding_width=16,
            pool_stride=pool_stride)
        dict_outputs = test_network([word_ids, mask, type_ids])
        data = dict_outputs["sequence_output"]
        pooled = dict_outputs["pooled_output"]
        model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])
        outputs = model.predict([word_id_data, mask_data, type_id_data])
        self.assertEqual(outputs[0].shape[-1], hidden_size)
        self.assertTrue(hasattr(test_network, "_embedding_projection"))