def test_to_dict(self): """To_dict returns the parameters to replicate the copula.""" # Setup copula = GaussianMultivariate() data = pd.read_csv('data/iris.data.csv') copula.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] expected_result = { 'covariance': covariance, 'fitted': True, 'type': 'copulas.multivariate.gaussian.GaussianMultivariate', 'distribution': 'copulas.univariate.gaussian.GaussianUnivariate', 'distribs': { 'feature_01': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 5.843333333333334, 'std': 0.8253012917851409, 'fitted': True, }, 'feature_02': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.0540000000000003, 'std': 0.4321465800705435, 'fitted': True, }, 'feature_03': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.758666666666666, 'std': 1.7585291834055212, 'fitted': True, }, 'feature_04': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 1.1986666666666668, 'std': 0.7606126185881716, 'fitted': True, } } } # Run result = copula.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_sample_constant_column(self): """Gaussian copula can sample after being fit with a constant column. This process will raise warnings when computing the covariance matrix """ # Setup instance = GaussianMultivariate() X = np.array([ [1.0, 2.0], [1.0, 3.0], [1.0, 4.0], [1.0, 5.0] ]) instance.fit(X) # Run result = instance.sample(5) # Check assert result.shape == (5, 2) assert result[~result.isnull()].all().all() assert result.loc[:, 0].equals(pd.Series([1.0, 1.0, 1.0, 1.0, 1.0], name=0)) # This is to check that the samples on the non constant column are not constant too. assert len(result.loc[:, 1].unique()) > 1 covariance = instance.covariance assert (~pd.isnull(covariance)).all().all()
def test_sample(self): """Generated samples keep the same mean and deviation as the original data.""" copula = GaussianMultivariate() stats = [{ 'mean': 10000, 'std': 15 }, { 'mean': 150, 'std': 10 }, { 'mean': -50, 'std': 0.1 }] data = pd.DataFrame( [np.random.normal(x['mean'], x['std'], 100) for x in stats]).T copula.fit(data) # Run result = copula.sample(1000000) # Check assert result.shape == (1000000, 3) for i, stat in enumerate(stats): expected_mean = np.mean(data[i]) expected_std = np.std(data[i]) result_mean = np.mean(result[i]) result_std = np.std(result[i]) assert abs(expected_mean - result_mean) < abs(expected_mean / 100) assert abs(expected_std - result_std) < abs(expected_std / 100)
def test_sample_random_state(self): """When random_state is set the samples are the same.""" # Setup instance = GaussianMultivariate(GaussianUnivariate, random_seed=0) data = pd.DataFrame([ {'A': 25, 'B': 75, 'C': 100}, {'A': 30, 'B': 60, 'C': 250}, {'A': 10, 'B': 65, 'C': 350}, {'A': 20, 'B': 80, 'C': 150}, {'A': 25, 'B': 70, 'C': 500} ]) instance.fit(data) expected_result = pd.DataFrame( np.array([ [25.19031668, 61.96527251, 543.43595269], [31.50262306, 49.70971698, 429.06537124], [20.31636799, 64.3492326, 384.27561823], [25.00302427, 72.06019812, 415.85215123], [23.07525773, 66.70901743, 390.8226672] ]), columns=['A', 'B', 'C'] ) # Run result = instance.sample(5) # Check pd.testing.assert_frame_equal(result, expected_result, check_less_precise=True)
def test_sample_random_state(self): """When random_state is set the samples are the same.""" # Setup instance = GaussianMultivariate( random_seed=0, distribution='copulas.univariate.gaussian.GaussianUnivariate') data = pd.DataFrame([{ 'A': 25, 'B': 75, 'C': 100 }, { 'A': 30, 'B': 60, 'C': 250 }, { 'A': 10, 'B': 65, 'C': 350 }, { 'A': 20, 'B': 80, 'C': 150 }, { 'A': 25, 'B': 70, 'C': 500 }]) instance.fit(data) expected_result = pd.DataFrame([{ 'A': 25.566882482769294, 'B': 61.01690157277244, 'C': 575.71068885087790 }, { 'A': 32.624255560452110, 'B': 47.31477394460025, 'C': 447.84049148268970 }, { 'A': 20.117642182744806, 'B': 63.68224998298797, 'C': 397.76402526341593 }, { 'A': 25.357483201156676, 'B': 72.30337152729443, 'C': 433.06766240515134 }, { 'A': 23.202174689737113, 'B': 66.32056962524452, 'C': 405.08384853948280 }]) # Run result = instance.sample(5) # Check assert result.equals(expected_result)
def test_get_lower_bounds(self): """get_lower_bounds returns the point from where cut the tail of the infinite integral.""" # Setup copula = GaussianMultivariate() copula.fit(self.data) expected_result = -3.104256111232535 # Run result = copula.get_lower_bound() # Check assert result == expected_result
def test_probability_density(self): """Probability_density computes probability for the given values.""" # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) X = np.array([2000., 200., 0.]) expected_result = 0.032245296420409846 # Run result = copula.probability_density(X) # Check self.assertAlmostEqual(result, expected_result)
def test_cumulative_distribution_fit_call_pd(self): """Cumulative_density integrates the probability density along the given values.""" # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data.values) X = np.array([2000., 200., 1.]) expected_result = 0.4550595153746892 # Run result = copula.cumulative_distribution(X) # Check assert np.isclose(result, expected_result, atol=1e-5).all().all()
def test_probability_density(self): """Probability_density computes probability for the given values.""" # Setup copula = GaussianMultivariate() copula.fit(self.data) X = np.array([[0., 0., 0.]]) expected_result = 0.059566912334560594 # Run result = copula.probability_density(X) # Check assert result == expected_result
def test_cumulative_distribution_fit_call_pd(self): """Cumulative_density integrates the probability density along the given values.""" # Setup copula = GaussianMultivariate() copula.fit(self.data.values) X = pd.Series([1., 1., 1.]) expected_result = 0.5822020991592192 # Run result = copula.cumulative_distribution(X) # Check assert np.isclose(result, expected_result).all().all()
def test_save(self, json_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = GaussianMultivariate() data = pd.read_csv('data/iris.data.csv') instance.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] parameters = { 'covariance': covariance, 'distribs': { 'feature_01': { 'mean': 5.843333333333334, 'std': 0.8253012917851409 }, 'feature_02': { 'mean': 3.0540000000000003, 'std': 0.4321465800705435 }, 'feature_03': { 'mean': 3.758666666666666, 'std': 1.7585291834055212 }, 'feature_04': { 'mean': 1.1986666666666668, 'std': 0.7606126185881716 } } } expected_content = parameters # Run instance.save('test.json') # Check compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_fit_default_distribution(self): """On fit, a distribution is created for each column along the covariance and means""" copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) for i, key in enumerate(self.data.columns): assert copula.columns[i] == key assert copula.univariates[i].__class__ == GaussianUnivariate assert copula.univariates[i]._params['loc'] == self.data[key].mean() assert copula.univariates[i]._params['scale'] == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_deprecation_warnings(self): """After fitting, Gaussian copula can produce new samples warningless.""" # Setup copula = GaussianMultivariate() data = pd.read_csv('data/iris.data.csv') # Run with warnings.catch_warnings(record=True) as warns: copula.fit(data) result = copula.sample(10) # Check assert len(warns) == 0 assert len(result) == 10
def test_cumulative_distribution_fit_call_np_array(self): """Cumulative_density integrates the probability density along the given values.""" # Setup copula = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') copula.fit(self.data.values) X = np.array([2000., 200., 1.]) expected_result = 0.4460456536217443 # Run result = copula.cumulative_distribution(X) # Check assert np.isclose(result, expected_result, atol=1e-5).all().all()
def test_probability_density(self): """Probability_density computes probability for the given values.""" # Setup copula = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') copula.fit(self.data) X = np.array([2000., 200., 0.]) expected_result = 0.031163598715950383 # Run result = copula.probability_density(X) # Check self.assertAlmostEqual(result, expected_result)
def test__get_covariance_numpy_array(self): """_get_covariance computes the covariance matrix of normalized values.""" # Setup copula = GaussianMultivariate() copula.fit(self.data.values) expected_covariance = np.array([[1.04347826, -0.01316681, -0.20683455], [-0.01316681, 1.04347826, -0.176307], [-0.20683455, -0.176307, 1.04347826]]) # Run covariance = copula._get_covariance(self.data.values) # Check assert np.isclose(covariance, expected_covariance).all().all()
def test__get_covariance(self): """_get_covariance computes the covariance matrix of normalized values.""" # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) expected_covariance = np.array([[1., -0.01261819, -0.19821644], [-0.01261819, 1., -0.16896087], [-0.19821644, -0.16896087, 1.]]) # Run covariance = copula._get_covariance(self.data) # Check assert np.isclose(covariance, expected_covariance).all().all()
def test_fit_numpy_array(self): """Fit should work indistinctly with numpy arrays and pandas dataframes """ # Setup copula = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') # Run copula.fit(self.data.values) # Check for key, (column, univariate) in enumerate(zip(self.data.columns, copula.univariates)): assert univariate._params['loc'] == np.mean(self.data[column]) assert univariate._params['scale'] == np.std(self.data[column]) expected_covariance = copula._get_covariance(pd.DataFrame(self.data.values)) assert (copula.covariance == expected_covariance).all().all()
def test_from_dict(self): """from_dict generates a new instance from its parameters.""" # Setup copula = GaussianMultivariate() copula.fit(self.data) copula_dict = copula.to_dict() # Run new_copula = GaussianMultivariate.from_dict(copula_dict) # Asserts assert isinstance(new_copula, GaussianMultivariate) assert new_copula.columns == ['column1', 'column2', 'column3'] assert len(new_copula.univariates) == 3 for new_univariate, old_univariate in zip(copula.univariates, new_copula.univariates): assert new_univariate.to_dict() == old_univariate.to_dict()
def test_fit_distribution_selector(self): """ On fit, it should use the correct distributions for those that are specified and default to using the base class otherwise. """ copula = GaussianMultivariate(distribution={ 'column1': 'copulas.univariate.beta.BetaUnivariate', 'column2': 'copulas.univariate.gaussian_kde.GaussianKDE', }) copula.fit(self.data) assert get_qualified_name( copula.univariates[0].__class__) == 'copulas.univariate.beta.BetaUnivariate' assert get_qualified_name( copula.univariates[1].__class__) == 'copulas.univariate.gaussian_kde.GaussianKDE' assert get_qualified_name( copula.univariates[2].__class__) == 'copulas.univariate.base.Univariate'
def test_fit_numpy_array(self): """Fit should work indistinctly with numpy arrays and pandas dataframes """ # Setup copula = GaussianMultivariate() # Run copula.fit(self.data.values) # Check for key, column in enumerate(self.data.columns): assert copula.distribs[key] assert copula.distribs[key].mean == np.mean(self.data[column]) assert copula.distribs[key].std == np.std(self.data[column]) expected_covariance = copula._get_covariance( pd.DataFrame(self.data.values)) assert (copula.covariance == expected_covariance).all().all()
def test_fit(self): """On fit, a distribution is created for each column along the covariance and means""" # Setup copula = GaussianMultivariate() # Run copula.fit(self.data) # Check for key in self.data.columns: assert copula.distribs[key] assert copula.distribs[key].mean == self.data[key].mean() assert copula.distribs[key].std == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_fit_distribution_arg(self): """On fit, the distributions for each column use instances of copula.distribution.""" # Setup distribution = 'copulas.univariate.gaussian_kde.GaussianKDE' copula = GaussianMultivariate(distribution=distribution) # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.gaussian_kde.GaussianKDE' for i, key in enumerate(self.data.columns): assert copula.columns[i] == key assert get_qualified_name(copula.univariates[i].__class__) == copula.distribution expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_fit_distribution_arg(self): """On fit, the distributions for each column use instances of copula.distribution.""" # Setup distribution = 'copulas.univariate.kde.KDEUnivariate' copula = GaussianMultivariate(distribution=distribution) # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.kde.KDEUnivariate' for key in self.data.columns: assert key in copula.distribs assert get_qualified_name( copula.distribs[key].__class__) == copula.distribution expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_to_dict(self): """To_dict returns the parameters to replicate the copula.""" # Setup copula = GaussianMultivariate() copula.fit(self.data) # Run result = copula.to_dict() # Asserts assert result['type'] == 'copulas.multivariate.gaussian.GaussianMultivariate' assert result['columns'] == ['column1', 'column2', 'column3'] assert len(result['univariates']) == 3 expected_cov = copula._get_covariance(self.data).to_numpy().tolist() np.testing.assert_equal(result['covariance'], expected_cov) for univariate, result_univariate in zip(copula.univariates, result['univariates']): assert univariate.to_dict() == result_univariate
def test_fit_default_distribution(self): """On fit, a distribution is created for each column along the covariance and means""" # Setup copula = GaussianMultivariate() # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.gaussian.GaussianUnivariate' for key in self.data.columns: assert key in copula.distribs assert get_qualified_name( copula.distribs[key].__class__) == copula.distribution assert copula.distribs[key].mean == self.data[key].mean() assert copula.distribs[key].std == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_sample(self, normal_mock): """Sample use the inverse-transform method to generate new samples.""" # Setup instance = GaussianMultivariate(GaussianUnivariate) data = pd.DataFrame([ {'A': 25, 'B': 75, 'C': 100}, {'A': 30, 'B': 60, 'C': 250}, {'A': 10, 'B': 65, 'C': 350}, {'A': 20, 'B': 80, 'C': 150}, {'A': 25, 'B': 70, 'C': 500} ]) instance.fit(data) normal_mock.return_value = np.array([ [0.1, 0.1, 0.1], [0.2, 0.2, 0.2], [0.4, 0.4, 0.4], [0.6, 0.6, 0.6], [0.8, 0.8, 0.8] ]) expected_result = pd.DataFrame([ {'A': 22.678232998312527, 'B': 70.70710678118655, 'C': 284.35270009440734}, {'A': 23.356465996625055, 'B': 71.41421356237309, 'C': 298.7054001888146}, {'A': 24.712931993250110, 'B': 72.82842712474618, 'C': 327.4108003776293}, {'A': 26.069397989875164, 'B': 74.24264068711929, 'C': 356.116200566444}, {'A': 27.425863986500215, 'B': 75.65685424949238, 'C': 384.8216007552586} ]) # Run result = instance.sample(5) # Check assert result.equals(expected_result) assert normal_mock.called_once_with( np.zeros(instance.covariance.shape[0]), instance.covariance, 5 )
def test_get_parameters_non_parametric(self): """Test the ``get_parameters`` method when model is parametric. If there is at least one distributions in the model that is not parametric, a NonParametricError should be raised. Setup: - ``self._model`` is set to a ``GaussianMultivariate`` that uses ``GaussianKDE`` as its ``distribution``. Side Effects: - A NonParametricError is raised. """ # Setup gm = GaussianMultivariate(distribution=GaussianKDE()) data = pd.DataFrame([1, 1, 1]) gm.fit(data) gc = Mock() gc._model = gm # Run, Assert with pytest.raises(NonParametricError): GaussianCopula.get_parameters(gc)
# Copula Opinion Pooling Input P_mat = np.array([[0, -1, 0, 0, 1, 0], [0, -0.5, 1, -0.5, 0, 0]]) # views mu_v = np.array([0.0006, 0.0007]) # normal view mean sigma_v = np.array([0.05, 0.075]) # normal view sigma range_v = np.array([[0, 0.01], [0, 0.02]]) # uniform view range k_ = P_mat.shape[0] # number of views Conf_full = ones((k_, 1)) - 1e-6 # full confidence levels Conf = ones((k_, 1)) * 0.25 # half confidence levels # Market Information fit and simulation # Market Prior from Gaussian Copula Simulation gc = GaussianMultivariate() gc.fit(data) print(gc) M = gc.sample(1000) Utility.disp_stat([M], ['Prior']) MPrior = M.values name = 'True' print('Mean_{}'.format(name)) mean = np.expand_dims(data.mean(axis=0), axis=0).T print(data.mean(axis=0).round(2)) print('-------------------------------------------------\n') df_cov = pd.DataFrame(data=gc.covariance) gc_corr = Estimation.cov_2_corr(gc.covariance)[0] gc_sigvec = Estimation.cov_2_corr(gc.covariance)[1] df_corr = pd.DataFrame(data=gc_corr) print('Correlation_{}'.format(name))
def test_save(self, json_mock, open_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') data = pd.read_csv('data/iris.data.csv') instance.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] expected_content = { 'covariance': covariance, 'fitted': True, 'type': 'copulas.multivariate.gaussian.GaussianMultivariate', 'distribution': 'copulas.univariate.gaussian.GaussianUnivariate', 'columns': ['feature_01', 'feature_02', 'feature_03', 'feature_04'], 'univariates': [{ 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 5.843333333333334, 'std': 0.8253012917851409, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.0540000000000003, 'std': 0.4321465800705435, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.758666666666666, 'std': 1.7585291834055212, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 1.1986666666666668, 'std': 0.7606126185881716, 'fitted': True, }] } # Run instance.save('test.json') # Check assert open_mock.called_once_with('test.json', 'w') compare_nested_dicts(json_mock.call_args[0][0], expected_content)