def test_simple_build(self): transformer = TransformerEncoderBuilder().get() builder = TransformerEncoderBuilder() builder.n_layers = 1 builder.n_heads = 4 builder.attention_type = "linear" transformer = builder.get() with self.assertRaises(ValueError): builder = TransformerEncoderBuilder() builder.attention_type = "whatever"
def test_attention_parameter(self): builder = TransformerEncoderBuilder() builder.n_layers = 3 builder.n_heads = 4 builder.feed_forward_dimensions = 512 builder.query_dimensions = 32 builder.value_dimensions = 64 builder.dropout = 0.1 builder.activation = "relu" builder.final_normalization = True # Full attention parameters builder.softmax_temp = 1.0 builder.attention_dropout = 0.1 # Linear attention parameters builder.feature_map = lambda x: (x > 0).float() * x # Clustered attention parameters builder.clusters = 100 builder.iterations = 10 builder.bits = 32 builder.hash_bias = True # Exact topk attention parameters builder.topk = 32 # Conditional attention parameters builder.length_limit = 512 # Reformer attention parameters builder.chunk_size = 32 builder.rounds = 1 # Add here old parameters to avoid regressions invalid = [ "dropout_rate" ] for name in invalid: with self.assertRaises(AttributeError): setattr(builder, name, None)