Esempio n. 1
0
    def test_model_to_disk(self):
        np.random.seed(0)
        tf.random.set_seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        model = CTGANSynthesizer(batch_size=self._vars['batch_size'],
                                 pac=self._vars['pac'])
        self.assertIsNotNone(model)
        model.train(data, discrete, epochs=1)
        model_path = os.path.join(self._current_dir, 'model_test.joblib')
        model.dump(model_path, overwrite=True)
        loaded_model = CTGANSynthesizer(file_path=model_path)
        self.assertIsNotNone(loaded_model)

        for attr, value in loaded_model.__dict__.items():
            self.assertTrue(attr in model.__dict__)
            if type(value) in [int, float, tuple]:
                self.assertEqual(value, model.__dict__[attr])

        np.testing.assert_equal(loaded_model._cond_generator.__dict__,
                                model._cond_generator.__dict__)

        for attr, value in loaded_model._transformer.__dict__.items():
            if isinstance(value, pd.Series):
                pd.testing.assert_series_equal(
                    value, model._transformer.__dict__[attr])
            elif isinstance(value, list) and isinstance(value[0], tf.Tensor):
                tf.assert_equal(value, model._transformer.__dict__[attr])
            else:
                np.testing.assert_equal(value,
                                        model._transformer.__dict__[attr])

        np.testing.assert_equal(loaded_model._generator.get_weights(),
                                model._generator.get_weights())
Esempio n. 2
0
    def test_inverse_transform(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        transformed_data = transformer.transform(data)
        inverse_data = transformer.inverse_transform(transformed_data)
        pd.testing.assert_frame_equal(data, inverse_data)
Esempio n. 3
0
    def test_sample(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        train_data = transformer.transform(data)

        cond_gen = ConditionalGenerator(
            train_data, transformer.output_info, True)
        output = cond_gen.sample(self._vars['batch_size'])
        self.assertIsNotNone(output)
        c, m, col, opt = output
        expected_c = np.array([
            [1., 0., 0., 0.],
            [0., 0., 1., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 0., 0., 1.],
            [0., 1., 0., 0.]], dtype=np.float32)
        expected_m = np.array([[1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.],
                               [1.]], dtype=np.float32)
        expected_col = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        expected_opt = np.array([0, 2, 0, 0, 0, 0, 0, 1, 3, 1])
        np.testing.assert_equal(c, expected_c)
        np.testing.assert_equal(m, expected_m)
        np.testing.assert_equal(col, expected_col)
        np.testing.assert_equal(opt, expected_opt)

        output = cond_gen.sample_zero(self._vars['batch_size'])
        self.assertIsNotNone(output)
        print(output)
        expected_output = [
            [0., 0., 1., 0.],
            [0., 1., 0., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [1., 0., 0., 0.],
            [0., 0., 1., 0.],
            [1., 0., 0., 0.],
            [0., 1., 0., 0.]]
        np.testing.assert_equal(output, expected_output)
Esempio n. 4
0
    def test_fit(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        expected_info = [(1, 'tanh', 1), (1, 'softmax', 1), (4, 'softmax', 0)]
        expected_dimensions = 6
        np.testing.assert_equal(transformer.output_info, expected_info)
        np.testing.assert_equal(transformer.output_dimensions,
                                expected_dimensions)
Esempio n. 5
0
    def test_sample_none(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, [])
        train_data = transformer.transform(data)

        sampler = DataSampler(train_data, transformer.output_info)
        output = sampler.sample(1, None, None)
        expected_output = np.array([[0.46909913, 1., 0.08564136, 0., 1.]])
        np.testing.assert_almost_equal(output, expected_output,
                                       self._vars['decimal'])
Esempio n. 6
0
    def test_sample(self):
        np.random.seed(0)
        tf.random.set_seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        model = CTGANSynthesizer(
            batch_size=self._vars['batch_size'], pac=self._vars['pac'])
        self.assertIsNotNone(model)

        model.train(data, discrete, epochs=1)
        output = model.sample(self._n_samples).values
        expected_output = np.array([[0.4139329, 3.0]])
        np.testing.assert_almost_equal(
            output, expected_output, decimal=self._vars['decimal'])
Esempio n. 7
0
    def test_sample_none(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, [])
        train_data = transformer.transform(data)

        cond_gen = ConditionalGenerator(train_data, transformer.output_info,
                                        True)
        output = cond_gen.sample(self._vars['batch_size'])
        self.assertIsNone(output)

        output = cond_gen.sample_zero(self._vars['batch_size'])
        self.assertIsNone(output)
Esempio n. 8
0
    def test_tensors(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        transformer.generate_tensors()
        expected_info = [
            tf.constant([0, 1, 0], dtype=tf.int32),
            tf.constant([1, 2, 1], dtype=tf.int32),
            tf.constant([2, 6, 1], dtype=tf.int32)
        ]
        expected_cond = [tf.constant([2, 6, 0, 4, 0], dtype=tf.int32)]
        tf.assert_equal(expected_info, transformer.output_tensor)
        tf.assert_equal(expected_cond, transformer.cond_tensor)
Esempio n. 9
0
    def test_sample(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        train_data = transformer.transform(data)

        sampler = DataSampler(train_data, transformer.output_info)
        output = sampler.sample(1, [0, 0], [0, 0])
        expected_output = np.array([[0.3721639, 1., 1., 0., 0., 0.],
                                    [-0.31326372, 1., 1., 0., 0., 0.]])
        np.testing.assert_almost_equal(output,
                                       expected_output,
                                       decimal=self._vars['decimal'])
Esempio n. 10
0
    def test_transform(self):
        np.random.seed(0)
        data, discrete = generate_data(self._vars['batch_size'])

        transformer = DataTransformer()
        transformer.fit(data, discrete)
        transformed_data = transformer.transform(data)
        expected_data = np.array([[-0.09027826, 1., 1., 0., 0., 0.],
                                  [0.1340608, 1., 0., 1., 0., 0.],
                                  [-0.01753295, 1., 0., 1., 0., 0.],
                                  [-0.09557786, 1., 1., 0., 0., 0.],
                                  [-0.25904065, 1., 0., 1., 0., 0.],
                                  [0.04062398, 1., 0., 0., 0., 1.],
                                  [-0.24025436, 1., 0., 0., 1., 0.],
                                  [0.3721639, 1., 1., 0., 0., 0.],
                                  [0.46909913, 1., 0., 0., 1., 0.],
                                  [-0.31326372, 1., 1., 0., 0., 0.]])
        np.testing.assert_almost_equal(transformed_data,
                                       expected_data,
                                       decimal=self._vars['decimal'])
Esempio n. 11
0
 def test_train(self):
     np.random.seed(0)
     tf.random.set_seed(0)
     data, discrete = generate_data(self._vars['batch_size'])
     self._assert_train_equal(data, [])
     self._assert_train_equal(data, discrete)