Exemple #1
0
    def test__apply_activate_(self):
        """Test `_apply_activate` for tables with both continuous and categoricals.

        Check every continuous column has all values between -1 and 1
        (since they are normalized), and check every categorical column adds up to 1.

        Setup:
            - Mock `self._transformer.output_info_list`

        Input:
            - data = tensor of shape (N, data_dims)

        Output:
            - tensor = tensor of shape (N, data_dims)
        """
        model = CTGANSynthesizer()
        model._transformer = Mock()
        model._transformer.output_info_list = [[SpanInfo(
            3, 'softmax')], [SpanInfo(1, 'tanh'),
                             SpanInfo(2, 'softmax')]]

        data = torch.randn(100, 6)
        result = model._apply_activate(data)

        assert result.shape == (100, 6)
        _assert_is_between(result[:, 0:3], 0.0, 1.0)
        _assert_is_between(result[:3], -1.0, 1.0)
        _assert_is_between(result[:, 4:6], 0.0, 1.0)
Exemple #2
0
    def test__cond_loss(self):
        """Test `_cond_loss`.

        Test that the loss is purely a function of the target categorical.

        Setup:
            - mock transformer.output_info_list
            - create two categoricals, one continuous
            - compute the conditional loss, conditioned on the 1st categorical
            - compare the loss to the cross-entropy of the 1st categorical, manually computed

        Input:
            data - the synthetic data generated by the model
            c - a tensor with the same shape as the data but with only a specific one-hot vector
                corresponding to the target column filled in
            m - binary mask used to select the categorical column to condition on

        Output:
            loss scalar; this should only be affected by the target column

        Note:
            - even though the implementation of this is probably right, I'm not sure if the idea
              behind it is correct
        """
        model = CTGANSynthesizer()
        model._transformer = Mock()
        model._transformer.output_info_list = [
            [SpanInfo(1, 'tanh'), SpanInfo(2, 'softmax')],
            [SpanInfo(3, 'softmax')
             ],  # this is the categorical column we are conditioning on
            [SpanInfo(2, 'softmax')
             ],  # this is the categorical column we are bry jrbec on
        ]

        data = torch.tensor([
            # first 3 dims ignored, next 3 dims are the prediction, last 2 dims are ignored
            [0.0, -1.0, 0.0, 0.05, 0.05, 0.9, 0.1, 0.4],
        ])

        c = torch.tensor([
            # first 3 dims are a one-hot for the categorical,
            # next 2 are for a different categorical that we are not conditioning on
            # (continuous values are not stored in this tensor)
            [0.0, 0.0, 1.0, 0.0, 0.0],
        ])

        # this indicates that we are conditioning on the first categorical
        m = torch.tensor([[1, 0]])

        result = model._cond_loss(data, c, m)
        expected = torch.nn.functional.cross_entropy(
            torch.tensor([
                [0.05, 0.05, 0.9],  # 3 categories, one hot
            ]),
            torch.tensor([2]))

        assert (result - expected).abs() < 1e-3