def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 <= cdf).all() and (cdf <= 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) diffs = np.diff( cdf ) + 0.001 # Add tolerance to avoid floating precision issues. assert (diffs >= 0).all()
def test_cdf(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) # Test CDF cdf = model.cumulative_distribution(sampled_data) assert (0 < cdf).all() and (cdf < 1).all() # Test CDF increasing function for column in sampled_data.columns: sorted_data = sampled_data.sort_values(column) other_columns = data.columns.to_list() other_columns.remove(column) row = sorted_data.sample(1).iloc[0] for column in other_columns: sorted_data[column] = row[column] cdf = model.cumulative_distribution(sorted_data) assert (np.diff(cdf) >= 0).all()
def test_to_dict_from_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) params = model.to_dict() model2 = GaussianMultivariate.from_dict(params) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def test_save_load(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def copula_based(X,Y): """ Calculate joint PDF/CDF using copula """ import pandas as pd from copulas.multivariate import GaussianMultivariate # fit gaussian copula data=pd.DataFrame(list(zip(X,Y)),columns=['P','T']) dist=GaussianMultivariate() dist.fit(data) sampled=dist.sample(1) sampled.at[0,'P']=np.mean(X) sampled.at[0,'T']=np.mean(Y) # find pdf/cdf at mean value pdf=dist.pdf(sampled) cdf=dist.cumulative_distribution(sampled) return [pdf,cdf]
def _gaussian(self, dataset): """ For the given dataset, this runs "everything but the kitchen sink" (i.e. every feature of GaussianMultivariate that is officially supported) and makes sure it doesn't crash. """ model = GaussianMultivariate({ dataset.columns[0]: GaussianKDE() # Use a KDE for the first column }) model.fit(dataset) for N in [10, 100, 50]: assert len(model.sample(N)) == N sampled_data = model.sample(10) pdf = model.probability_density(sampled_data) cdf = model.cumulative_distribution(sampled_data) # Test Save/Load from Dictionary config = model.to_dict() model2 = GaussianMultivariate.from_dict(config) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01)) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01))