def test_model_to_disk(self): np.random.seed(0) tf.random.set_seed(0) data, discrete = generate_data(self._vars['batch_size']) model = CTGANSynthesizer(batch_size=self._vars['batch_size'], pac=self._vars['pac']) self.assertIsNotNone(model) model.train(data, discrete, epochs=1) model_path = os.path.join(self._current_dir, 'model_test.joblib') model.dump(model_path, overwrite=True) loaded_model = CTGANSynthesizer(file_path=model_path) self.assertIsNotNone(loaded_model) for attr, value in loaded_model.__dict__.items(): self.assertTrue(attr in model.__dict__) if type(value) in [int, float, tuple]: self.assertEqual(value, model.__dict__[attr]) np.testing.assert_equal(loaded_model._cond_generator.__dict__, model._cond_generator.__dict__) for attr, value in loaded_model._transformer.__dict__.items(): if isinstance(value, pd.Series): pd.testing.assert_series_equal( value, model._transformer.__dict__[attr]) elif isinstance(value, list) and isinstance(value[0], tf.Tensor): tf.assert_equal(value, model._transformer.__dict__[attr]) else: np.testing.assert_equal(value, model._transformer.__dict__[attr]) np.testing.assert_equal(loaded_model._generator.get_weights(), model._generator.get_weights())
def test_inverse_transform(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) transformed_data = transformer.transform(data) inverse_data = transformer.inverse_transform(transformed_data) pd.testing.assert_frame_equal(data, inverse_data)
def test_sample(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) train_data = transformer.transform(data) cond_gen = ConditionalGenerator( train_data, transformer.output_info, True) output = cond_gen.sample(self._vars['batch_size']) self.assertIsNotNone(output) c, m, col, opt = output expected_c = np.array([ [1., 0., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 0., 1.], [0., 1., 0., 0.]], dtype=np.float32) expected_m = np.array([[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.]], dtype=np.float32) expected_col = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) expected_opt = np.array([0, 2, 0, 0, 0, 0, 0, 1, 3, 1]) np.testing.assert_equal(c, expected_c) np.testing.assert_equal(m, expected_m) np.testing.assert_equal(col, expected_col) np.testing.assert_equal(opt, expected_opt) output = cond_gen.sample_zero(self._vars['batch_size']) self.assertIsNotNone(output) print(output) expected_output = [ [0., 0., 1., 0.], [0., 1., 0., 0.], [0., 1., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.], [0., 1., 0., 0.]] np.testing.assert_equal(output, expected_output)
def test_fit(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) expected_info = [(1, 'tanh', 1), (1, 'softmax', 1), (4, 'softmax', 0)] expected_dimensions = 6 np.testing.assert_equal(transformer.output_info, expected_info) np.testing.assert_equal(transformer.output_dimensions, expected_dimensions)
def test_sample_none(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, []) train_data = transformer.transform(data) sampler = DataSampler(train_data, transformer.output_info) output = sampler.sample(1, None, None) expected_output = np.array([[0.46909913, 1., 0.08564136, 0., 1.]]) np.testing.assert_almost_equal(output, expected_output, self._vars['decimal'])
def test_sample(self): np.random.seed(0) tf.random.set_seed(0) data, discrete = generate_data(self._vars['batch_size']) model = CTGANSynthesizer( batch_size=self._vars['batch_size'], pac=self._vars['pac']) self.assertIsNotNone(model) model.train(data, discrete, epochs=1) output = model.sample(self._n_samples).values expected_output = np.array([[0.4139329, 3.0]]) np.testing.assert_almost_equal( output, expected_output, decimal=self._vars['decimal'])
def test_sample_none(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, []) train_data = transformer.transform(data) cond_gen = ConditionalGenerator(train_data, transformer.output_info, True) output = cond_gen.sample(self._vars['batch_size']) self.assertIsNone(output) output = cond_gen.sample_zero(self._vars['batch_size']) self.assertIsNone(output)
def test_tensors(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) transformer.generate_tensors() expected_info = [ tf.constant([0, 1, 0], dtype=tf.int32), tf.constant([1, 2, 1], dtype=tf.int32), tf.constant([2, 6, 1], dtype=tf.int32) ] expected_cond = [tf.constant([2, 6, 0, 4, 0], dtype=tf.int32)] tf.assert_equal(expected_info, transformer.output_tensor) tf.assert_equal(expected_cond, transformer.cond_tensor)
def test_sample(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) train_data = transformer.transform(data) sampler = DataSampler(train_data, transformer.output_info) output = sampler.sample(1, [0, 0], [0, 0]) expected_output = np.array([[0.3721639, 1., 1., 0., 0., 0.], [-0.31326372, 1., 1., 0., 0., 0.]]) np.testing.assert_almost_equal(output, expected_output, decimal=self._vars['decimal'])
def test_transform(self): np.random.seed(0) data, discrete = generate_data(self._vars['batch_size']) transformer = DataTransformer() transformer.fit(data, discrete) transformed_data = transformer.transform(data) expected_data = np.array([[-0.09027826, 1., 1., 0., 0., 0.], [0.1340608, 1., 0., 1., 0., 0.], [-0.01753295, 1., 0., 1., 0., 0.], [-0.09557786, 1., 1., 0., 0., 0.], [-0.25904065, 1., 0., 1., 0., 0.], [0.04062398, 1., 0., 0., 0., 1.], [-0.24025436, 1., 0., 0., 1., 0.], [0.3721639, 1., 1., 0., 0., 0.], [0.46909913, 1., 0., 0., 1., 0.], [-0.31326372, 1., 1., 0., 0., 0.]]) np.testing.assert_almost_equal(transformed_data, expected_data, decimal=self._vars['decimal'])
def test_train(self): np.random.seed(0) tf.random.set_seed(0) data, discrete = generate_data(self._vars['batch_size']) self._assert_train_equal(data, []) self._assert_train_equal(data, discrete)