def test_fit_constant(self):
        """If fit data is constant, no gaussian_kde model is created."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 1, 1, 1, 1])

        # Run
        instance.fit(X)

        # Check
        assert instance.model is None
        assert instance.constant_value == 1
        assert instance.fitted is True
Exemple #2
0
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
            count += 1

        self.tree = get_tree(TreeTypes.DIRECT)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)
Exemple #3
0
    def test_cumulative_distribution(self):
        """cumulative_distribution evaluates with the model."""
        instance = GaussianKDE()
        instance.fit(np.array([0.9, 1.0, 1.1]))

        cdf = instance.cumulative_distribution(np.array([
            0.0,  # There is no data below this (cdf = 0.0).
            1.0,  # Half the data is below this (cdf = 0.5).
            2.0,  # All the data is below this (cdf = 1.0).
            -1.0  # There is no data below this (cdf = 0).
        ]))

        assert np.all(np.isclose(cdf, np.array([0.0, 0.5, 1.0, 0.0]), atol=1e-3))
Exemple #4
0
    def test_probability_density(self, kde_mock):
        """Sample calls the gaussian_kde.resample method."""
        instance = GaussianKDE()
        instance.fit(np.array([1, 2, 3, 4]))

        model = kde_mock.return_value
        model.evaluate.return_value = np.array([0.1, 0.2, 0.3])

        pdf = instance.probability_density(np.array([1, 2, 3]))

        assert instance._model.evaluate.call_count == 1
        input_array = instance._model.evaluate.call_args[0][0]
        np.testing.assert_equal(input_array, np.array([1, 2, 3]))
        np.testing.assert_equal(pdf, np.array([0.1, 0.2, 0.3]))
    def test_cdf(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        # Test the CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 <= cdf).all() and (cdf <= 1).all()

        # Test CDF increasing function
        sorted_data = sorted(sampled_data)
        cdf = model.cumulative_distribution(sorted_data)
        assert (np.diff(cdf) >= 0).all()
    def test_sample_constant(self):
        """If constant_value is set, all the sample have the same value."""
        # Setup
        instance = GaussianKDE()
        instance.fitted = True
        instance.constant_value = 3
        instance._replace_constant_methods()

        expected_result = np.array([3, 3, 3, 3, 3])

        # Run
        result = instance.sample(5)

        # Check
        compare_nested_iterables(result, expected_result)
    def test_to_dict_from_dict(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
    def test_save_load(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianKDE.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
    def test_to_dict_from_dict_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        sampled_data = model.sample(50)
        pdf = model.probability_density(sampled_data)
        cdf = model.cumulative_distribution(sampled_data)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        np.testing.assert_equal(np.full(50, 5), sampled_data)
        np.testing.assert_equal(np.full(50, 5), model2.sample(50))
        np.testing.assert_equal(np.full(50, 1), pdf)
        np.testing.assert_equal(np.full(50, 1), model2.probability_density(sampled_data))
        np.testing.assert_equal(np.full(50, 1), cdf)
        np.testing.assert_equal(np.full(50, 1), model2.cumulative_distribution(sampled_data))
    def test_fit(self, kde_mock):
        """On fit, a new instance of gaussian_kde is fitted."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4, 5])

        kde_instance = MagicMock(evaluate='pdf')
        kde_mock.return_value = kde_instance

        # Run
        instance.fit(X)

        # Check
        assert instance.model == kde_instance
        assert instance.fitted is True
        assert instance.constant_value is None
        assert instance.probability_density == 'pdf'
        kde_mock.assert_called_once_with(X)
    def test_probability_density_constant(self, pdf_mock):
        """If constant_value, probability_density uses the degenerate version."""
        # Setup
        instance = GaussianKDE()
        instance.fitted = True
        instance.constant_value = 3
        instance._replace_constant_methods()

        X = np.array([0, 1, 2, 3, 4, 5])
        expected_result = np.array([0, 0, 1, 0, 0])

        pdf_mock.return_value = np.array([0, 0, 1, 0, 0])

        # Run
        result = instance.probability_density(X)

        # Check
        compare_nested_iterables(result, expected_result)
        pdf_mock.assert_called_once_with(instance, X)
    def test_percent_point_constant_raises(self, ppf_mock):
        """If constant_value, percent_point uses the degenerate version."""
        # Setup
        instance = GaussianKDE()
        instance.fitted = True
        instance.constant_value = 3
        instance._replace_constant_methods()

        X = np.array([0.1, 0.5, 0.75])
        expected_result = np.array([3, 3, 3])

        ppf_mock.return_value = np.array([3, 3, 3])

        # Run
        result = instance.percent_point(X)

        # Check
        compare_nested_iterables(result, expected_result)
        ppf_mock.assert_called_once_with(instance, X)
Exemple #13
0
    def test_probability_density(self, kde_mock):
        """probability_density evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.evaluate.return_value = np.array([0.0, 0.5, 1.0])

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)
        call_data = np.array([-10, 0, 10])

        expected_result = np.array([0.0, 0.5, 1.0])

        # Run
        result = instance.probability_density(call_data)

        # Check
        compare_nested_iterables(result, expected_result)

        kde_mock.assert_called_once_with(fit_data)
        model_mock.evaluate.assert_called_once_with(call_data)
    def test_sample(self, kde_mock):
        """When fitted, we are able to use the model to get samples."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.resample.return_value = np.array([[0, 1, 0, 1, 0]])

        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4, 5])
        instance.fit(X)

        expected_result = np.array([0, 1, 0, 1, 0])

        # Run
        result = instance.sample(5)

        # Check
        compare_nested_iterables(result, expected_result)

        assert instance.model == model_mock
        kde_mock.assert_called_once_with(X)
        model_mock.resample.assert_called_once_with(5)
Exemple #15
0
    def test_to_dict(self):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        distribution = GaussianKDE()
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'constant_value':
            None,
            'd':
            1,
            'n':
            10,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
            'covariance': [[0.20810696044195218]],
            'factor':
            0.6309573444801932,
            'inv_cov': [[4.805221304834407]]
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemple #16
0
    def test_serialization_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(
                X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)

        # Run
        result = Tree.from_dict(instance.to_dict())

        # Check
        assert result.to_dict() == instance.to_dict()
    def test_to_dict(self, kde_mock):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])

        kde_instance_mock = kde_mock.return_value
        kde_instance_mock.dataset = column
        kde_instance_mock.resample.return_value = column
        distribution = GaussianKDE()
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'lower':
            -3.8990040374074275,
            'upper':
            5.008742466979867,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemple #18
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'index': 0,
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [0.7969636014074211, 0.6887638642325501, 0.12078520049364487],
                        [0.6887638642325501, 0.7969636014074211, 0.12078520049364487]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'index': 1,
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [0.12078520049364491, 0.7969636014074213, 0.6887638642325501],
                        [0.12078520049364491, 0.6887638642325503, 0.7969636014074211]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemple #19
0
    def test__is_constant_false(self):
        distribution = GaussianKDE()

        distribution.fit(np.array([1, 2, 3, 4]))

        assert not distribution._is_constant()
Exemple #20
0
    def test__is_constant_true(self):
        distribution = GaussianKDE()

        distribution.fit(np.array([1, 1, 1, 1]))

        assert distribution._is_constant()
Exemple #21
0
 def test_percent_point_boundary_values(self):
     instance = GaussianKDE()
     instance.fit(np.array([0.0, 0.5, 1.0]))
     x = instance.percent_point(np.array([0.0, 1.0]))
     assert x[0] == float("-inf")
     assert x[1] == float("inf")
Exemple #22
0
 def test_percent_point_invertibility(self):
     instance = GaussianKDE()
     instance.fit(sample_univariate_bimodal())
     cdf = np.random.random(size=1000)
     x = instance.percent_point(cdf)
     assert np.abs(instance.cumulative_distribution(x) - cdf).max() < 1e-6