Example #1
0
    def test_simple_build(self):
        transformer = TransformerEncoderBuilder().get()
        builder = TransformerEncoderBuilder()
        builder.n_layers = 1
        builder.n_heads = 4
        builder.attention_type = "linear"
        transformer = builder.get()

        with self.assertRaises(ValueError):
            builder = TransformerEncoderBuilder()
            builder.attention_type = "whatever"
Example #2
0
    def test_attention_matrix(self):
        A = []
        def store_attention(event):
            A.append(event.attention_matrix)
        # default transformer is 4 layers 4 heads
        transformer = TransformerEncoderBuilder().get()
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(A), 0)

        EventDispatcher.get().listen(AttentionEvent, store_attention)
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(A), 4)
Example #3
0
    def test_attention_parameter(self):
        builder = TransformerEncoderBuilder()

        builder.n_layers = 3
        builder.n_heads = 4
        builder.feed_forward_dimensions = 512
        builder.query_dimensions = 32
        builder.value_dimensions = 64
        builder.dropout = 0.1
        builder.activation = "relu"
        builder.final_normalization = True

        # Full attention parameters
        builder.softmax_temp = 1.0
        builder.attention_dropout = 0.1

        # Linear attention parameters
        builder.feature_map = lambda x: (x > 0).float() * x

        # Clustered attention parameters
        builder.clusters = 100
        builder.iterations = 10
        builder.bits = 32
        builder.hash_bias = True

        # Exact topk attention parameters
        builder.topk = 32

        # Conditional attention parameters
        builder.length_limit = 512

        # Reformer attention parameters
        builder.chunk_size = 32
        builder.rounds = 1

        # Add here old parameters to avoid regressions
        invalid = [
            "dropout_rate"
        ]
        for name in invalid:
            with self.assertRaises(AttributeError):
                setattr(builder, name, None)
Example #4
0
    def test_qkv(self):
        d = {}
        def store_qkv(event):
            d["q"] = event.queries
            d["k"] = event.keys
            d["v"] = event.values
        # default transformer is 4 layers 4 heads
        transformer = TransformerEncoderBuilder().get()
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(d), 0)

        EventDispatcher.get().listen(QKVEvent, store_qkv)
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(d), 3)
        d.clear()

        EventDispatcher.get().remove(store_qkv)
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(d), 0)
        d.clear()

        EventDispatcher.get().listen(
            QKVEvent & layer_name_contains(transformer, "layers.2.attention"),
            store_qkv
        )
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(d), 3)
        d.clear()

        EventDispatcher.get().listen(
            QKVEvent & layer_name_contains(transformer, "layers.22.attention"),
            store_qkv
        )
        x = transformer(torch.rand(1, 100, 64*4))
        self.assertEqual(len(d), 0)
        d.clear()
        EventDispatcher.get().clear()