コード例 #1
0
ファイル: combiners.py プロジェクト: jimthompson5802/ludwig
    def __init__(
        self, input_features: Dict[str, "InputFeature"], config: TabNetCombinerConfig = None, **kwargs
    ) -> None:
        super().__init__(input_features)
        self.name = "TabNetCombiner"
        logger.debug(f" {self.name}")

        self.tabnet = TabNet(
            self.concatenated_shape[-1],
            config.size,
            config.output_size,
            num_steps=config.num_steps,
            num_total_blocks=config.num_total_blocks,
            num_shared_blocks=config.num_shared_blocks,
            relaxation_factor=config.relaxation_factor,
            bn_epsilon=config.bn_epsilon,
            bn_momentum=config.bn_momentum,
            bn_virtual_bs=config.bn_virtual_bs,
            sparsity=config.sparsity,
        )

        if config.dropout > 0:
            self.dropout = torch.nn.Dropout(config.dropout)
        else:
            self.dropout = None
コード例 #2
0
ファイル: combiners.py プロジェクト: cxz/ludwig
    def __init__(
            self,
            size: int = 32,  # N_a in the paper
            output_size: int = 32,  # N_d in the paper
            num_steps: int = 1,  # N_steps in the paper
            num_total_blocks: int = 4,
            num_shared_blocks: int = 2,
            relaxation_factor: float = 1.5,  # gamma in the paper
            bn_epsilon: float = 1e-3,
            bn_momentum: float = 0.7,  # m_B in the paper
            bn_virtual_bs: int = None,  # B_v from the paper
            sparsity: float = 1e-5,  # lambda_sparse in the paper
            dropout=0,
            **kwargs
    ):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.tabnet = TabNet(
            size=size,
            output_size=output_size,
            num_steps=num_steps,
            num_total_blocks=num_total_blocks,
            num_shared_blocks=num_shared_blocks,
            relaxation_factor=relaxation_factor,
            bn_epsilon=bn_epsilon,
            bn_momentum=bn_momentum,
            bn_virtual_bs=bn_virtual_bs,
            sparsity=sparsity
        )

        if dropout > 0:
            self.dropout = tf.keras.layers.Dropout(dropout)
        else:
            self.dropout = None
コード例 #3
0
ファイル: combiners.py プロジェクト: admariner/ludwig
    def __init__(
            self,
            input_features,
            size: int,  # N_a in the paper
            output_size: int,  # N_d in the paper
            num_steps: int = 1,  # N_steps in the paper
            num_total_blocks: int = 4,
            num_shared_blocks: int = 2,
            relaxation_factor: float = 1.5,  # gamma in the paper
            bn_epsilon: float = 1e-5,
            bn_momentum: float = 0.7,  # m_B in the paper
            bn_virtual_bs: int = None,  # B_v from the paper
            sparsity: float = 1e-5,  # lambda_sparse in the paper
            dropout=0,
            **kwargs):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        # todo this assumes each input feature outputs size 1
        #  or 1hot for categorical
        feature_sizes = []
        for feature in input_features.values():
            if feature.type == NUMERICAL or feature.type == BINARY:
                feature_sizes.append(1)
            elif feature.type == CATEGORY:
                feature_sizes.append(feature.encoder_obj.embedding_size)
            else:
                raise ValueError(
                    "TabNet does not currently support {} features, "
                    "it only supports binary, numerical and category".format(
                        feature[TYPE]))

        self.tabnet = TabNet(num_features=sum(feature_sizes),
                             size=size,
                             output_size=output_size,
                             num_steps=num_steps,
                             num_total_blocks=num_total_blocks,
                             num_shared_blocks=num_shared_blocks,
                             relaxation_factor=relaxation_factor,
                             bn_epsilon=bn_epsilon,
                             bn_momentum=bn_momentum,
                             bn_virtual_bs=bn_virtual_bs,
                             sparsity=sparsity)

        if dropout > 0:
            self.dropout = tf.keras.layers.Dropout(dropout)
        else:
            self.dropout = None
コード例 #4
0
def test_tabnet(input_size: int, output_size: int, size: int, virtual_batch_size: Optional[int]) -> None:
    # setup synthetic tensor
    torch.manual_seed(RANDOM_SEED)
    input_tensor = torch.randn([BATCH_SIZE, input_size], dtype=torch.float32)

    tabnet = TabNet(input_size, size, output_size, num_steps=3, num_total_blocks=4, num_shared_blocks=2)

    output = tabnet(input_tensor)

    # check for expected shape and properties
    assert isinstance(output, tuple)
    assert output[0].shape == (BATCH_SIZE, output_size)

    assert tabnet.input_shape[-1] == input_size
    assert tabnet.output_shape[-1] == output_size
    assert tabnet.input_dtype == torch.float32
コード例 #5
0
def test_tabnet(
    entmax_mode: Optional[str],
    input_size: int,
    output_size: int,
    size: int,
    virtual_batch_size: Optional[int],
    batch_size: int,
) -> None:
    # setup synthetic tensor
    torch.manual_seed(RANDOM_SEED)
    input_tensor = torch.randn([batch_size, input_size], dtype=torch.float32)

    tabnet = TabNet(input_size,
                    size,
                    output_size,
                    num_steps=3,
                    num_total_blocks=4,
                    num_shared_blocks=2,
                    entmax_mode=entmax_mode)

    output = tabnet(input_tensor)

    # check for expected shape and properties
    assert isinstance(output, tuple)
    assert output[0].shape == (batch_size, output_size)

    assert tabnet.input_shape[-1] == input_size
    assert tabnet.output_shape[-1] == output_size
    assert tabnet.input_dtype == torch.float32

    # check for parameter updates
    target = torch.randn([batch_size, 1])
    fpc, tpc, upc, not_updated = check_module_parameters_updated(
        tabnet, (input_tensor, ), target)

    if batch_size == 1:
        # for single record batches, batchnorm layer is bypassed, only a subset of parameters are updated
        assert upc == 15, (
            f"Updated parameter count not expected value. Parameters not updated: {not_updated}"
            f"\nModule structure:\n{tabnet}")
    else:
        # update count should equal trainable number of parameters
        assert tpc == upc, (
            f"All parameter not updated. Parameters not updated: {not_updated}"
            f"\nModule structure:\n{tabnet}")
コード例 #6
0
    def __init__(
            self,
            input_features,
            size: int,  # N_a in the paper
            output_size: int,  # N_d in the paper
            num_steps: int = 1,  # N_steps in the paper
            num_total_blocks: int = 4,
            num_shared_blocks: int = 2,
            relaxation_factor: float = 1.5,  # gamma in the paper
            bn_epsilon: float = 1e-5,
            bn_momentum: float = 0.7,  # m_B in the paper
            bn_virtual_divider: int = 1,
            # factor to divide batch_size B to get B_v from the paper
            sparsity: float = 1e-5,  # lambda_sparse in the paper
            dropout=0,
            **kwargs):
        super().__init__()
        logger.debug(' {}'.format(self.name))

        self.tabnet = TabNet(
            num_features=len(input_features),  # todo this assumes each input
            #  feature outputs size 1
            size=size,
            output_size=output_size,
            num_steps=num_steps,
            num_total_blocks=num_total_blocks,
            num_shared_blocks=num_shared_blocks,
            relaxation_factor=relaxation_factor,
            bn_epsilon=bn_epsilon,
            bn_momentum=bn_momentum,
            bn_virtual_divider=bn_virtual_divider,
            sparsity=sparsity)

        if dropout > 0:
            self.dropout = tf.keras.layers.Dropout(dropout)
        else:
            self.dropout = None