コード例 #1
0
ファイル: test_tvae.py プロジェクト: sdv-dev/CTGAN
def test_fixed_random_seed():
    """Test the TVAESynthesizer with a fixed seed.

    Expect that when the random seed is reset with the same seed, the same sequence
    of data will be produced. Expect that the data generated with the seed is
    different than randomly sampled data.
    """
    # Setup
    data = pd.DataFrame({
        'continuous': np.random.random(100),
        'discrete': np.random.choice(['a', 'b', 'c'], 100)
    })
    discrete_columns = ['discrete']

    tvae = TVAESynthesizer(epochs=1)

    # Run
    tvae.fit(data, discrete_columns)
    sampled_random = tvae.sample(10)

    tvae.set_random_state(0)
    sampled_0_0 = tvae.sample(10)
    sampled_0_1 = tvae.sample(10)

    tvae.set_random_state(0)
    sampled_1_0 = tvae.sample(10)
    sampled_1_1 = tvae.sample(10)

    # Assert
    assert not np.array_equal(sampled_random, sampled_0_0)
    assert not np.array_equal(sampled_random, sampled_0_1)
    np.testing.assert_array_equal(sampled_0_0, sampled_1_0)
    np.testing.assert_array_equal(sampled_0_1, sampled_1_1)
コード例 #2
0
ファイル: test_tvae.py プロジェクト: Tecnarca/dpgan-ecb
def test_synthesizer_sample():
    data = pd.DataFrame({'discrete': np.random.choice(['a', 'b'], 100)})
    discrete_columns = ['discrete']

    tvae = TVAESynthesizer(epochs=1)
    tvae.fit(data, discrete_columns)

    samples = tvae.sample(1000)
    assert isinstance(samples, pd.DataFrame)
コード例 #3
0
ファイル: test_tvae.py プロジェクト: Tecnarca/dpgan-ecb
def test_tvae_numpy():
    data = pd.DataFrame({
        'continuous': np.random.random(1000),
        'discrete': np.random.choice(['a', 'b'], 1000)
    })
    discrete_columns = [1]

    tvae = TVAESynthesizer(epochs=10)
    tvae.fit(data.values, discrete_columns)

    sampled = tvae.sample(100)

    assert sampled.shape == (100, 2)
    assert isinstance(sampled, np.ndarray)
    assert set(np.unique(sampled[:, 1])) == {'a', 'b'}
コード例 #4
0
ファイル: test_tvae.py プロジェクト: djyra/CTGAN
def test_drop_last_false():
    data = pd.DataFrame({
        '1': ['a', 'b', 'c'] * 150,
        '2': ['a', 'b', 'c'] * 150
    })

    tvae = TVAESynthesizer(epochs=300)
    tvae.fit(data, ['1', '2'])

    sampled = tvae.sample(100)
    correct = 0
    for _, row in sampled.iterrows():
        if row['1'] == row['2']:
            correct += 1

    assert correct >= 95
コード例 #5
0
ファイル: test_tvae.py プロジェクト: Tecnarca/dpgan-ecb
def test_tvae_dataframe():
    data = pd.DataFrame({
        'continuous': np.random.random(1000),
        'discrete': np.random.choice(['a', 'b'], 1000)
    })
    discrete_columns = ['discrete']

    tvae = TVAESynthesizer(epochs=10)
    tvae.fit(data, discrete_columns)

    sampled = tvae.sample(100)

    assert sampled.shape == (100, 2)
    assert isinstance(sampled, pd.DataFrame)
    assert set(sampled.columns) == {'continuous', 'discrete'}
    assert set(sampled['discrete'].unique()) == {'a', 'b'}
コード例 #6
0
ファイル: test_tvae.py プロジェクト: Tecnarca/dpgan-ecb
def test_save_load():
    data = pd.DataFrame({
        'continuous': np.random.random(100),
        'discrete': np.random.choice(['a', 'b'], 100)
    })
    discrete_columns = ['discrete']

    tvae = TVAESynthesizer(epochs=10)
    tvae.fit(data, discrete_columns)

    with tf.TemporaryDirectory() as temporary_directory:
        tvae.save(temporary_directory + "test_tvae.pkl")
        tvae = TVAESynthesizer.load(temporary_directory + "test_tvae.pkl")

    sampled = tvae.sample(1000)
    assert set(sampled.columns) == {'continuous', 'discrete'}
    assert set(sampled['discrete'].unique()) == {'a', 'b'}
コード例 #7
0
ファイル: test_tvae.py プロジェクト: djyra/CTGAN
def test_loss_function():
    data = pd.DataFrame({
        '1': [float(i) for i in range(1000)],
        '2': [float(2 * i) for i in range(1000)]
    })

    tvae = TVAESynthesizer(epochs=300)
    tvae.fit(data)

    num_samples = 1000
    sampled = tvae.sample(num_samples)
    error = 0
    for _, row in sampled.iterrows():
        error += abs(2 * row['1'] - row['2'])

    avg_error = error / num_samples

    assert avg_error < 400
コード例 #8
0
ファイル: test_tvae.py プロジェクト: djyra/CTGAN
def test_tvae(tmpdir):
    iris = datasets.load_iris()
    data = pd.DataFrame(iris.data, columns=iris.feature_names)
    data['class'] = pd.Series(iris.target).map(iris.target_names.__getitem__)

    tvae = TVAESynthesizer(epochs=10)
    tvae.fit(data, ['class'])

    path = str(tmpdir / 'test_tvae.pkl')
    tvae.save(path)
    tvae = TVAESynthesizer.load(path)

    sampled = tvae.sample(100)

    assert sampled.shape == (100, 5)
    assert isinstance(sampled, pd.DataFrame)
    assert set(sampled.columns) == set(data.columns)
    assert set(sampled.dtypes) == set(data.dtypes)
コード例 #9
0
ファイル: test_tvae.py プロジェクト: sdv-dev/CTGAN
def test__loss_function():
    """Test the TVAESynthesizer produces average values similar to the training data."""
    data = pd.DataFrame({
        '1': [float(i) for i in range(1000)],
        '2': [float(2 * i) for i in range(1000)]
    })

    tvae = TVAESynthesizer(epochs=300)
    tvae.fit(data)

    num_samples = 1000
    sampled = tvae.sample(num_samples)
    error = 0
    for _, row in sampled.iterrows():
        error += abs(2 * row['1'] - row['2'])

    avg_error = error / num_samples

    assert avg_error < 400