Exemple #1
0
def test_copula_ndarray_feature_generation():
    n_samples = 200
    n_features = 100
    input_data = np.random.normal(size=(n_samples, n_features))

    generator = syn.CopulaDataGenerator(verbose=True)

    generator.fit(input_data, copula=syn.GaussianCopula(),
                  parameterize_by=syn.QuantileParameterizer(n_quantiles=100))

    pickled = pickle.dumps(generator)
    generator = pickle.loads(pickled)

    n_synthetic_samples = 50
    synthetic_data = generator.generate(n_samples=n_synthetic_samples)

    assert synthetic_data.shape == (n_synthetic_samples, n_features)
    assert isinstance(synthetic_data, np.ndarray)
Exemple #2
0
def test_vine_copula_feature_generation():
    n_samples = 200
    n_features = 100
    input_data = xr.DataArray(np.random.normal(size=(n_samples, n_features)))

    generator = syn.CopulaDataGenerator(verbose=True)

    ctrl = pv.FitControlsVinecop(family_set=[pv.gaussian], trunc_lvl=1, select_trunc_lvl=False, show_trace=True)
    generator.fit(input_data, copula=syn.VineCopula(controls=ctrl),
                  parameterize_by=syn.QuantileParameterizer(n_quantiles=100))

    pickled = pickle.dumps(generator)
    generator = pickle.loads(pickled)

    n_synthetic_samples = 50
    synthetic_data = generator.generate(n_samples=n_synthetic_samples)

    assert synthetic_data.shape == (n_synthetic_samples, n_features)
    assert isinstance(synthetic_data, xr.DataArray)

    assert generator.generate(10, seed=42).equals(generator.generate(10, seed=42))
Exemple #3
0
def test_independent_feature_generation_with_distribution():
    n_samples = 20
    n_features = 2
    input_data = np.random.normal(size=(n_samples, n_features))

    generator = syn.CopulaDataGenerator()

    dist_names = set(syn.DistributionParameterizer.get_dist_names())
    # Remove all very slow distributions
    dist_names -= set(['genexpon', 'levy_stable', 'recipinvgauss', 'vonmises', 'kstwo'])

    generator.fit(input_data, copula=syn.IndependenceCopula(), 
        parameterize_by=syn.DistributionParameterizer(dist_names, verbose=True))

    pickled = pickle.dumps(generator)
    generator = pickle.loads(pickled)

    n_synthetic_samples = 50
    synthetic_data = generator.generate(n_samples=n_synthetic_samples)

    assert synthetic_data.shape == (n_synthetic_samples, n_features)
    assert isinstance(synthetic_data, np.ndarray)
Exemple #4
0
def test_copula_dataset_generation():
    n_samples = 200
    n_features = [10, 20]
    input_data = xr.Dataset({
        'a': (('sample', 'foo'), np.random.normal(size=(n_samples, n_features[0]))),
        'b': (('sample', 'bar'), np.random.normal(size=(n_samples, n_features[1])))
    })

    generator = syn.CopulaDataGenerator()

    generator.fit(input_data, copula=syn.GaussianCopula(), 
                  parameterize_by=syn.QuantileParameterizer(n_quantiles=100))

    pickled = pickle.dumps(generator)
    generator = pickle.loads(pickled)

    n_synthetic_samples = 50
    synthetic_data = generator.generate(n_samples=n_synthetic_samples)

    assert synthetic_data['a'].shape == (n_synthetic_samples, n_features[0])
    assert synthetic_data['b'].shape == (n_synthetic_samples, n_features[1])

    assert generator.generate(10, seed=42).equals(generator.generate(10, seed=42))
Exemple #5
0
def test_vine_copula_with_categorical():
    a = np.array([[0, 1, 2], [3, 4, 5]])
    b = np.array([6, 7])
    input_data = xr.Dataset({
        'a': (('sample', 'foo'), a),
        'b': (('sample'), b)
        })

    generator = syn.CopulaDataGenerator(verbose=True)

    ctrl = pv.FitControlsVinecop(family_set=[pv.gaussian], trunc_lvl=1, select_trunc_lvl=False, show_trace=True)
    generator.fit(input_data, types={ 'b': 'cat' }, copula=syn.VineCopula(controls=ctrl))

    pickled = pickle.dumps(generator)
    generator = pickle.loads(pickled)

    n_synthetic_samples = 50
    synthetic_data = generator.generate(n_samples=n_synthetic_samples)

    assert synthetic_data['a'].shape == (n_synthetic_samples, 3)
    assert synthetic_data['b'].shape == (n_synthetic_samples,)

    assert generator.generate(10, seed=42).equals(generator.generate(10, seed=42))
Exemple #6
0
def main():
    import pyvinecopulib as pv

    n_samples = 100
    n_features = 50
    n_synthetic_samples = 500
    input_data = np.random.normal(size=(n_samples, n_features))

    generator = syn.CopulaDataGenerator()

    t0 = time.time()
    generator.fit(input_data, syn.GaussianCopula())
    generator.generate(n_synthetic_samples)
    print(f'synthia: {time.time() - t0}s')

    t0 = time.time()
    generator.fit(
        input_data,
        syn.VineCopula(
            pv.FitControlsVinecop(family_set=[pv.gaussian],
                                  trunc_lvl=1,
                                  select_trunc_lvl=False)))
    generator.generate(n_synthetic_samples)
    print(f'pyvinecopulib: {time.time() - t0}s')