Example #1
0
 def _build_network(self):
     # Category Embedding layers
     self.cat_embedding_layers = nn.ModuleList(
         [
             nn.Embedding(cardinality, self.hparams.embedding_dim)
             for cardinality in self.hparams.categorical_cardinality
         ]
     )
     if self.hparams.batch_norm_continuous_input:
         self.normalizing_batch_norm = nn.BatchNorm1d(self.hparams.continuous_dim)
     # Continuous Embedding Layer
     self.cont_embedding_layer = nn.Embedding(
         self.hparams.continuous_dim, self.hparams.embedding_dim
     )
     if self.hparams.embedding_dropout != 0 and self.embedding_cat_dim != 0:
         self.embed_dropout = nn.Dropout(self.hparams.embedding_dropout)
     # Deep Layers
     _curr_units = self.hparams.embedding_dim
     if self.hparams.deep_layers:
         activation = getattr(nn, self.hparams.activation)
         # Linear Layers
         layers = []
         for units in self.hparams.layers.split("-"):
             layers.extend(
                 _linear_dropout_bn(
                     self.hparams,
                     _curr_units,
                     int(units),
                     activation,
                     self.hparams.dropout,
                 )
             )
             _curr_units = int(units)
         self.linear_layers = nn.Sequential(*layers)
     # Projection to Multi-Headed Attention Dims
     self.attn_proj = nn.Linear(_curr_units, self.hparams.attn_embed_dim)
     _initialize_layers(self.hparams, self.attn_proj)
     # Multi-Headed Attention Layers
     self.self_attns = nn.ModuleList(
         [
             nn.MultiheadAttention(
                 self.hparams.attn_embed_dim,
                 self.hparams.num_heads,
                 dropout=self.hparams.attn_dropouts,
             )
             for _ in range(self.hparams.num_attn_blocks)
         ]
     )
     if self.hparams.has_residuals:
         self.V_res_embedding = torch.nn.Linear(
             _curr_units,
             self.hparams.attn_embed_dim * self.hparams.num_attn_blocks
             if self.hparams.attention_pooling
             else self.hparams.attn_embed_dim,
         )
     self.output_dim = (
         self.hparams.continuous_dim + self.hparams.categorical_dim
     ) * self.hparams.attn_embed_dim
     if self.hparams.attention_pooling:
         self.output_dim = self.output_dim * self.hparams.num_attn_blocks
Example #2
0
 def _build_network(self):
     # Linear Layers
     layers = []
     _curr_units = self.embedding_cat_dim + self.hparams.continuous_dim
     if self.hparams.embedding_dropout != 0 and self.embedding_cat_dim != 0:
         layers.append(nn.Dropout(self.hparams.embedding_dropout))
     for units in self.hparams.layers.split("-"):
         layers.extend(
             _linear_dropout_bn(
                 self.hparams.activation,
                 self.hparams.initialization,
                 self.hparams.use_batch_norm,
                 _curr_units,
                 int(units),
                 self.hparams.dropout,
             ))
         _curr_units = int(units)
     self.linear_layers = nn.Sequential(*layers)
     self.output_dim = _curr_units
     # Embedding layers
     self.embedding_layers = nn.ModuleList(
         [nn.Embedding(x, y) for x, y in self.hparams.embedding_dims])
     # Continuous Layers
     if self.hparams.batch_norm_continuous_input:
         self.normalizing_batch_norm = nn.BatchNorm1d(
             self.hparams.continuous_dim)
Example #3
0
 def _build_network(self):
     activation = getattr(nn, self.hparams.activation)
     # Linear Layers
     layers = []
     _curr_units = self.embedding_cat_dim + self.hparams.continuous_dim
     if self.hparams.embedding_dropout != 0 and self.embedding_cat_dim != 0:
         layers.append(nn.Dropout(self.hparams.embedding_dropout))
     for units in self.hparams.layers.split("-"):
         layers.extend(
             _linear_dropout_bn(
                 self.hparams,
                 _curr_units,
                 int(units),
                 activation,
                 self.hparams.dropout,
             )
         )
         _curr_units = int(units)
     self.linear_layers = nn.Sequential(*layers)
     self.output_dim = _curr_units
    def _build_network(self):
        d_sqrt_inv = 1 / math.sqrt(self.hparams.input_embed_dim)
        if self.hparams.categorical_dim > 0:
            # Category Embedding layers
            if self.hparams.share_embedding:
                self.cat_embedding_layers = nn.ModuleList([
                    SharedEmbeddings(
                        cardinality,
                        self.hparams.input_embed_dim,
                        add_shared_embed=self.hparams.share_embedding_strategy
                        == "add",
                        frac_shared_embed=self.hparams.
                        shared_embedding_fraction,
                    ) for cardinality in self.hparams.categorical_cardinality
                ])

            else:
                self.cat_embedding_layers = nn.ModuleList([
                    nn.Embedding(cardinality, self.hparams.input_embed_dim)
                    for cardinality in self.hparams.categorical_cardinality
                ])
            if self.hparams.embedding_bias:
                self.cat_embedding_bias = nn.Parameter(
                    torch.Tensor(self.hparams.categorical_dim,
                                 self.hparams.input_embed_dim))
                _initialize_kaiming(
                    self.cat_embedding_bias,
                    self.hparams.embedding_initialization,
                    d_sqrt_inv,
                )
            # Continuous Embedding Layer
        if self.hparams.continuous_dim > 0:
            self.cont_embedding_layer = nn.Embedding(
                self.hparams.continuous_dim, self.hparams.input_embed_dim)
            _initialize_kaiming(
                self.cont_embedding_layer.weight,
                self.hparams.embedding_initialization,
                d_sqrt_inv,
            )
            if self.hparams.embedding_bias:
                self.cont_embedding_bias = nn.Parameter(
                    torch.Tensor(self.hparams.continuous_dim,
                                 self.hparams.input_embed_dim))
                _initialize_kaiming(
                    self.cont_embedding_bias,
                    self.hparams.embedding_initialization,
                    d_sqrt_inv,
                )
            if self.hparams.embedding_dropout != 0:
                self.embed_dropout = nn.Dropout(self.hparams.embedding_dropout)
        self.add_cls = AppendCLSToken(
            d_token=self.hparams.input_embed_dim,
            initialization=self.hparams.embedding_initialization,
        )
        self.transformer_blocks = OrderedDict()
        for i in range(self.hparams.num_attn_blocks):
            self.transformer_blocks[f"mha_block_{i}"] = TransformerEncoderBlock(
                input_embed_dim=self.hparams.input_embed_dim,
                num_heads=self.hparams.num_heads,
                ff_hidden_multiplier=self.hparams.ff_hidden_multiplier,
                ff_activation=self.hparams.transformer_activation,
                attn_dropout=self.hparams.attn_dropout,
                ff_dropout=self.hparams.ff_dropout,
                add_norm_dropout=self.hparams.add_norm_dropout,
                keep_attn=self.hparams.
                attn_feature_importance,  # Can use Attn Weights to derive feature importance
            )
        self.transformer_blocks = nn.Sequential(self.transformer_blocks)
        if self.hparams.attn_feature_importance:
            self.attention_weights_ = [None] * self.hparams.num_attn_blocks
        if self.hparams.batch_norm_continuous_input:
            self.normalizing_batch_norm = nn.BatchNorm1d(
                self.hparams.continuous_dim)
        # Final MLP Layers
        _curr_units = self.hparams.input_embed_dim
        # Linear Layers
        layers = []
        for units in self.hparams.out_ff_layers.split("-"):
            layers.extend(
                _linear_dropout_bn(
                    self.hparams.out_ff_activation,
                    self.hparams.out_ff_initialization,
                    self.hparams.use_batch_norm,
                    _curr_units,
                    int(units),
                    self.hparams.out_ff_dropout,
                ))
            _curr_units = int(units)
        self.linear_layers = nn.Sequential(*layers)
        self.output_dim = _curr_units
 def _build_network(self):
     if self.hparams.categorical_dim > 0:
         # Category Embedding layers
         if self.hparams.share_embedding:
             self.cat_embedding_layers = nn.ModuleList(
                 [
                     SharedEmbeddings(
                         cardinality,
                         self.hparams.input_embed_dim,
                         add_shared_embed=self.hparams.share_embedding_strategy
                         == "add",
                         frac_shared_embed=self.hparams.shared_embedding_fraction,
                     )
                     for cardinality in self.hparams.categorical_cardinality
                 ]
             )
         else:
             self.cat_embedding_layers = nn.ModuleList(
                 [
                     nn.Embedding(cardinality, self.hparams.input_embed_dim)
                     for cardinality in self.hparams.categorical_cardinality
                 ]
             )
         if self.hparams.embedding_dropout != 0:
             self.embed_dropout = nn.Dropout(self.hparams.embedding_dropout)
     self.transformer_blocks = OrderedDict()
     for i in range(self.hparams.num_attn_blocks):
         self.transformer_blocks[f"mha_block_{i}"] = TransformerEncoderBlock(
             input_embed_dim=self.hparams.input_embed_dim,
             num_heads=self.hparams.num_heads,
             ff_hidden_multiplier=self.hparams.ff_hidden_multiplier,
             ff_activation=self.hparams.transformer_activation,
             attn_dropout=self.hparams.attn_dropout,
             ff_dropout=self.hparams.ff_dropout,
             add_norm_dropout=self.hparams.add_norm_dropout,
             keep_attn=False,  # No easy way to convert TabTransformer Attn Weights to Feature Importance
         )
     self.transformer_blocks = nn.Sequential(self.transformer_blocks)
     self.attention_weights = [None] * self.hparams.num_attn_blocks
     if self.hparams.batch_norm_continuous_input:
         self.normalizing_batch_norm = nn.BatchNorm1d(self.hparams.continuous_dim)
     # Final MLP Layers
     _curr_units = (
         self.hparams.input_embed_dim * self.hparams.categorical_dim
         + self.hparams.continuous_dim
     )
     # Linear Layers
     layers = []
     for units in self.hparams.out_ff_layers.split("-"):
         layers.extend(
             _linear_dropout_bn(
                 self.hparams.out_ff_activation,
                 self.hparams.out_ff_initialization,
                 self.hparams.use_batch_norm,
                 _curr_units,
                 int(units),
                 self.hparams.out_ff_dropout,
             )
         )
         _curr_units = int(units)
     self.linear_layers = nn.Sequential(*layers)
     self.output_dim = _curr_units