def test__apply_activate_(self): """Test `_apply_activate` for tables with both continuous and categoricals. Check every continuous column has all values between -1 and 1 (since they are normalized), and check every categorical column adds up to 1. Setup: - Mock `self._transformer.output_info_list` Input: - data = tensor of shape (N, data_dims) Output: - tensor = tensor of shape (N, data_dims) """ model = CTGANSynthesizer() model._transformer = Mock() model._transformer.output_info_list = [[SpanInfo( 3, 'softmax')], [SpanInfo(1, 'tanh'), SpanInfo(2, 'softmax')]] data = torch.randn(100, 6) result = model._apply_activate(data) assert result.shape == (100, 6) _assert_is_between(result[:, 0:3], 0.0, 1.0) _assert_is_between(result[:3], -1.0, 1.0) _assert_is_between(result[:, 4:6], 0.0, 1.0)
def test__cond_loss(self): """Test `_cond_loss`. Test that the loss is purely a function of the target categorical. Setup: - mock transformer.output_info_list - create two categoricals, one continuous - compute the conditional loss, conditioned on the 1st categorical - compare the loss to the cross-entropy of the 1st categorical, manually computed Input: data - the synthetic data generated by the model c - a tensor with the same shape as the data but with only a specific one-hot vector corresponding to the target column filled in m - binary mask used to select the categorical column to condition on Output: loss scalar; this should only be affected by the target column Note: - even though the implementation of this is probably right, I'm not sure if the idea behind it is correct """ model = CTGANSynthesizer() model._transformer = Mock() model._transformer.output_info_list = [ [SpanInfo(1, 'tanh'), SpanInfo(2, 'softmax')], [SpanInfo(3, 'softmax') ], # this is the categorical column we are conditioning on [SpanInfo(2, 'softmax') ], # this is the categorical column we are bry jrbec on ] data = torch.tensor([ # first 3 dims ignored, next 3 dims are the prediction, last 2 dims are ignored [0.0, -1.0, 0.0, 0.05, 0.05, 0.9, 0.1, 0.4], ]) c = torch.tensor([ # first 3 dims are a one-hot for the categorical, # next 2 are for a different categorical that we are not conditioning on # (continuous values are not stored in this tensor) [0.0, 0.0, 1.0, 0.0, 0.0], ]) # this indicates that we are conditioning on the first categorical m = torch.tensor([[1, 0]]) result = model._cond_loss(data, c, m) expected = torch.nn.functional.cross_entropy( torch.tensor([ [0.05, 0.05, 0.9], # 3 categories, one hot ]), torch.tensor([2])) assert (result - expected).abs() < 1e-3