Ejemplo n.º 1
0
def get_tabnet(
        input_dim, output_dim, n_d, n_a, n_steps, gamma,
        remove_vehicle, cat_emb_dim, n_independent, n_shared,
        virtual_batch_size, momentum, epsilon=1e-8, mask_type="sparsemax",
        ):
    if remove_vehicle:
        cat_idxs = [0, 1]
        cat_dims = [3, 2]   # Fixed
    else:
        cat_idxs = [0, 1, 2]
        cat_dims = [2, 3, 2]

    if cat_emb_dim == 0:
        cat_idxs = []
        cat_dims = []

    network = TabNet(
        input_dim,
        output_dim,
        n_d=n_d,
        n_a=n_a,
        n_steps=n_steps,
        gamma=gamma,
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        cat_emb_dim=cat_emb_dim,
        n_independent=n_independent,
        n_shared=n_shared,
        epsilon=epsilon,
        virtual_batch_size=virtual_batch_size,
        momentum=momentum,
        mask_type=mask_type,
    )
    return network
Ejemplo n.º 2
0
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        embedding_sizes: List[Tuple] = None,
        loss_fn: Callable = F.cross_entropy,
        optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam,
        metrics: List[Metric] = None,
        learning_rate: float = 1e-3,
        multi_label: bool = False,
        serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None,
        **tabnet_kwargs,
    ):
        self.save_hyperparameters()

        cat_dims, cat_emb_dim = zip(*embedding_sizes) if len(embedding_sizes) else ([], [])
        model = TabNet(
            input_dim=num_features,
            output_dim=num_classes,
            cat_idxs=list(range(len(embedding_sizes))),
            cat_dims=list(cat_dims),
            cat_emb_dim=list(cat_emb_dim),
            **tabnet_kwargs
        )

        super().__init__(
            model=model,
            loss_fn=loss_fn,
            optimizer=optimizer,
            metrics=metrics,
            learning_rate=learning_rate,
            multi_label=multi_label,
            serializer=serializer,
        )
Ejemplo n.º 3
0
 def _build_network(self):
     self.tabnet = TabNet(
         input_dim=self.hparams.continuous_dim + self.hparams.categorical_dim,
         output_dim=self.hparams.output_dim,
         n_d=self.hparams.n_d,
         n_a=self.hparams.n_a,
         n_steps=self.hparams.n_steps,
         gamma=self.hparams.gamma,
         cat_idxs=[i for i in range(self.hparams.categorical_dim)],
         cat_dims=[cardinality for cardinality, _ in self.hparams.embedding_dims],
         cat_emb_dim=[embed_dim for _, embed_dim in self.hparams.embedding_dims],
         n_independent=self.hparams.n_independent,
         n_shared=self.hparams.n_shared,
         epsilon=1e-15,
         virtual_batch_size=self.hparams.virtual_batch_size,
         momentum=0.02,
         mask_type=self.hparams.mask_type,
     )
Ejemplo n.º 4
0
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        embedding_sizes: List[Tuple[int, int]] = None,
        loss_fn: Callable = F.cross_entropy,
        optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam,
        optimizer_kwargs: Optional[Dict[str, Any]] = None,
        scheduler: Optional[Union[Type[_LRScheduler], str, _LRScheduler]] = None,
        scheduler_kwargs: Optional[Dict[str, Any]] = None,
        metrics: Union[Metric, Callable, Mapping, Sequence, None] = None,
        learning_rate: float = 1e-2,
        multi_label: bool = False,
        serializer: Optional[Union[Serializer, Mapping[str, Serializer]]] = None,
        **tabnet_kwargs,
    ):
        self.save_hyperparameters()

        cat_dims, cat_emb_dim = zip(*embedding_sizes) if embedding_sizes else ([], [])
        model = TabNet(
            input_dim=num_features,
            output_dim=num_classes,
            cat_idxs=list(range(len(embedding_sizes))),
            cat_dims=list(cat_dims),
            cat_emb_dim=list(cat_emb_dim),
            **tabnet_kwargs,
        )

        super().__init__(
            model=model,
            loss_fn=loss_fn,
            optimizer=optimizer,
            optimizer_kwargs=optimizer_kwargs,
            scheduler=scheduler,
            scheduler_kwargs=scheduler_kwargs,
            metrics=metrics,
            learning_rate=learning_rate,
            multi_label=multi_label,
            serializer=serializer or Probabilities(),
        )

        self.save_hyperparameters()
Ejemplo n.º 5
0
 def __init__(
     self,
     input_dim,
     embedding_dim,
     out_dim,
 ):
     super(Classifier, self).__init__()
     self.tabnet = TabNet(input_dim=input_dim,
                          output_dim=embedding_dim,
                          n_d=32,
                          n_a=32,
                          n_steps=5,
                          gamma=1.3,
                          n_independent=2,
                          n_shared=2,
                          epsilon=1e-15,
                          virtual_batch_size=128,
                          momentum=0.02,
                          mask_type="sparsemax")
     self.fc = nn.Linear(embedding_dim, out_dim)
Ejemplo n.º 6
0
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        embedding_sizes: List[Tuple] = None,
        loss_fn: Callable = F.cross_entropy,
        optimizer: Type[torch.optim.Optimizer] = torch.optim.Adam,
        metrics: List[Metric] = None,
        learning_rate: float = 1e-2,
        multi_label: bool = False,
        serializer: Optional[Union[Serializer, Mapping[str,
                                                       Serializer]]] = None,
        **tabnet_kwargs,
    ):
        if not _TABULAR_AVAILABLE:
            raise ModuleNotFoundError(
                "Please, pip install 'lightning-flash[tabular]'")

        self.save_hyperparameters()

        cat_dims, cat_emb_dim = zip(
            *embedding_sizes) if len(embedding_sizes) else ([], [])
        model = TabNet(input_dim=num_features,
                       output_dim=num_classes,
                       cat_idxs=list(range(len(embedding_sizes))),
                       cat_dims=list(cat_dims),
                       cat_emb_dim=list(cat_emb_dim),
                       **tabnet_kwargs)

        super().__init__(
            model=model,
            loss_fn=loss_fn,
            optimizer=optimizer,
            metrics=metrics,
            learning_rate=learning_rate,
            multi_label=multi_label,
            serializer=serializer or Probabilities(),
        )

        self.save_hyperparameters()
Ejemplo n.º 7
0
    def __init__(
        self,
        numerical_input_dim,
        cat_vocab_sizes,
        cat_embedding_dim,
        embedding_dim,
    ):
        # only 1 categorical feature for now
        super(Encoder, self).__init__()
        self.numerical_input_dim = numerical_input_dim
        self.embedding_dim = embedding_dim
        self.cat_vocab_sizes = cat_vocab_sizes
        # TODO: experiment with out dim
        self.cat_embedding_dim = cat_embedding_dim

        self.num_event_encoder = nn.BatchNorm1d(numerical_input_dim)

        self.sequence_encoder = nn.GRU(embedding_dim,
                                       embedding_dim,
                                       batch_first=False)
        self.tabnet = TabNet(input_dim=numerical_input_dim +
                             len(cat_vocab_sizes),
                             output_dim=embedding_dim,
                             n_d=8,
                             n_a=8,
                             n_steps=3,
                             gamma=1.3,
                             cat_idxs=[4],
                             cat_dims=cat_vocab_sizes,
                             cat_emb_dim=cat_embedding_dim,
                             n_independent=2,
                             n_shared=2,
                             epsilon=1e-15,
                             virtual_batch_size=128,
                             momentum=0.02,
                             mask_type="sparsemax")