def __init__( self, d_feat: int = 6, embed_dim: List[ super_core.IntSpaceType] = DefaultSearchSpace["embed_dim"], num_heads: List[ super_core.IntSpaceType] = DefaultSearchSpace["num_heads"], mlp_hidden_multipliers: List[ super_core. IntSpaceType] = DefaultSearchSpace["mlp_hidden_multipliers"], qkv_bias: bool = DefaultSearchSpace["qkv_bias"], pos_drop: float = DefaultSearchSpace["pos_drop"], other_drop: float = DefaultSearchSpace["other_drop"], max_seq_len: int = 65, ): super(SuperTransformer, self).__init__() self._embed_dim = embed_dim self._num_heads = num_heads self._mlp_hidden_multipliers = mlp_hidden_multipliers # the stem part self.input_embed = super_core.SuperAlphaEBDv1(d_feat, embed_dim) self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) self.pos_embed = super_core.SuperPositionalEncoder( d_model=embed_dim, max_seq_len=max_seq_len, dropout=pos_drop) # build the transformer encode layers -->> check params _assert_types(num_heads, (tuple, list)) _assert_types(mlp_hidden_multipliers, (tuple, list)) assert len(num_heads) == len( mlp_hidden_multipliers), "{:} vs {:}".format( len(num_heads), len(mlp_hidden_multipliers)) # build the transformer encode layers -->> backbone layers = [] for num_head, mlp_hidden_multiplier in zip(num_heads, mlp_hidden_multipliers): layer = super_core.SuperTransformerEncoderLayer( embed_dim, num_head, qkv_bias, mlp_hidden_multiplier, other_drop, ) layers.append(layer) self.backbone = super_core.SuperSequential(*layers) # the regression head self.head = super_core.SuperSequential( super_core.SuperLayerNorm1D(embed_dim), super_core.SuperLinear(embed_dim, 1)) trunc_normal_(self.cls_token, std=0.02) self.apply(self._init_weights)
def test_super_simple_norm(self): out_features = spaces.Categorical(12, 24, 36) bias = spaces.Categorical(True, False) model = super_core.SuperSequential( super_core.SuperSimpleNorm(5, 0.5), super_core.SuperLinear(10, out_features, bias=bias), ) print("The simple super module is:\n{:}".format(model)) model.apply_verbose(True) print(model.super_run_type) self.assertTrue(model[1].bias) inputs = torch.rand(20, 10) print("Input shape: {:}".format(inputs.shape)) outputs = model(inputs) self.assertEqual(tuple(outputs.shape), (20, 36)) abstract_space = model.abstract_search_space abstract_space.clean_last() abstract_child = abstract_space.random() print("The abstract searc space:\n{:}".format(abstract_space)) print("The abstract child program:\n{:}".format(abstract_child)) model.set_super_run_type(super_core.SuperRunMode.Candidate) model.apply_candidate(abstract_child) output_shape = (20, abstract_child["1"]["_out_features"].value) outputs = model(inputs) self.assertEqual(tuple(outputs.shape), output_shape)
def _create_stel(input_dim, output_dim, order): return super_core.SuperSequential( super_core.SuperLinear(input_dim, output_dim), super_core.SuperTransformerEncoderLayer( output_dim, num_heads=spaces.Categorical(2, 4, 6), mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), order=order, ), )
def test_super_sequential_v1(): model = super_core.SuperSequential( super_core.SuperSimpleNorm(1, 1), torch.nn.ReLU(), super_core.SuperLeakyReLU(), super_core.SuperLinear(10, 10), super_core.SuperReLU(), ) inputs = torch.rand(10, 10) print(model) outputs = model(inputs) abstract_search_space = model.abstract_search_space print(abstract_search_space)
def test_super_sequential(batch, seq_dim, input_dim): out1_dim = spaces.Categorical(12, 24, 36) out2_dim = spaces.Categorical(24, 36, 48) out3_dim = spaces.Categorical(36, 72, 100) layer1 = _create_stel(input_dim, out1_dim) layer2 = _create_stel(out1_dim, out2_dim) layer3 = _create_stel(out2_dim, out3_dim) model = super_core.SuperSequential(layer1, layer2, layer3) print(model) model.apply_verbose(True) inputs = torch.rand(batch, seq_dim, input_dim) abstract_child, outputs = _internal_func(inputs, model) output_shape = ( batch, seq_dim, out3_dim.abstract(reuse_last=True).random(reuse_last=True).value, ) assert tuple(outputs.shape) == output_shape
def test_transformer_encoder(self, input_dim): output_dim = spaces.Categorical(12, 24, 36) model = super_core.SuperSequential( super_core.SuperLinear(input_dim, output_dim), super_core.SuperTransformerEncoderLayer( output_dim, num_heads=spaces.Categorical(2, 4, 6), mlp_hidden_multiplier=spaces.Categorical(1, 2, 4), ), ) print(model) model.apply_verbose(True) inputs = torch.rand(4, 20, input_dim) abstract_child, outputs = self._internal_func(inputs, model) output_shape = ( 4, 20, output_dim.abstract(reuse_last=True).random(reuse_last=True).value, ) self.assertEqual(tuple(outputs.shape), output_shape)