def test_fit_sample_distribution_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate(distribution={'x': GaussianKDE()}) model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_fit_sample_center(self): data = sample_trivariate_xyz() model = VineCopula('center') model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 <= cdf).all() and (cdf <= 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) diffs = np.diff( cdf ) + 0.001 # Add tolerance to avoid floating precision issues. assert (diffs >= 0).all()
def test_fit_sample_distribution_name(self): data = sample_trivariate_xyz() model = GaussianMultivariate( 'copulas.univariate.gaussian_kde.GaussianKDE') model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_pdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test PDF pdf = model.probability_density(sampled_data) assert (0 < pdf).all()
def test_to_dict_from_dict(self): data = sample_trivariate_xyz() model = VineCopula('direct') model.fit(data) sampled_data = model.sample(10) params = model.to_dict() model2 = VineCopula.from_dict(params) sampled_data = model2.sample(10) assert sampled_data.shape == (10, 3)
def test_fit_sample_distribution_dict_multiple(self): data = sample_trivariate_xyz() model = GaussianMultivariate( distribution={ 'x': Univariate(parametric=ParametricType.PARAMETRIC), 'y': BetaUnivariate(), 'z': GaussianKDE() }) model.fit(data) sampled_data = model.sample(10) assert sampled_data.shape == (10, 3)
def test_save_load(self): data = sample_trivariate_xyz() model = VineCopula('direct') model.fit(data) sampled_data = model.sample(10) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = VineCopula.load(path_to_model) sampled_data = model2.sample(10) assert sampled_data.shape == (10, 3)
def test_fit_sample_direct(self): data = sample_trivariate_xyz() model = VineCopula('direct') model.fit(data) for N in [10, 50, 100]: assert len(model.sample(N)) == N sampled_data = model.sample(10) assert sampled_data.shape == (10, 3) for column in data.columns: assert column in sampled_data
def test_fit_sample(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) for N in [10, 50, 100]: assert len(model.sample(N)) == N sampled_data = model.sample(10) assert sampled_data.shape == (10, 3) for column in data.columns: assert column in sampled_data
def test_to_dict_from_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) params = model.to_dict() model2 = GaussianMultivariate.from_dict(params) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def test_save_load(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def testMITCopulas(): import warnings warnings.filterwarnings('ignore') from copulas.datasets import sample_trivariate_xyz from copulas.multivariate import GaussianMultivariate from copulas.visualization import compare_3d # Load a dataset with 3 columns that are not independent real_data = sample_trivariate_xyz() # Fit a gaussian copula to the data copula = GaussianMultivariate() copula.fit(real_data) # Sample synthetic data synthetic_data = copula.sample(len(real_data)) # Plot the real and the synthetic data to compare compare_3d(real_data, synthetic_data) return True
def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 < cdf).all() and (cdf < 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) assert (np.diff(cdf) >= 0).all()
def test_compare_3d(): data = sample_trivariate_xyz() compare_3d(data, data)