Beispiel #1
0
    def test_cumulative_distribution(self, kde_mock):
        """cumulative_distribution evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.integrate_box_1d.side_effect = [0.0, 0.5, 1.0]

        model_mock.dataset = MagicMock()
        model_mock.dataset.mean.return_value = 1
        model_mock.dataset.std.return_value = 0.1

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)

        call_data = np.array([-10, 0, 10])
        expected_result = np.array([0.0, 0.5, 1.0])

        expected_integrate_1d_box_call_args_list = [
            ((0.5, -10),
             {}),  # The first argument is the lower_bound (1 - 0.1*5)
            ((0.5, 0), {}),
            ((0.5, 10), {}),
        ]

        # Run
        result = instance.cumulative_distribution(call_data)

        # Check
        compare_nested_iterables(result, expected_result)

        kde_mock.assert_called_once_with(fit_data)
        assert (model_mock.integrate_box_1d.call_args_list ==
                expected_integrate_1d_box_call_args_list)
Beispiel #2
0
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        # Setup
        # Build first tree
        data = pd.read_csv('data/iris.data.csv')
        tau_mat = data.corr(method='kendall').values
        u_matrix = np.empty(data.shape)

        for index, col in enumerate(data):
            uni = GaussianKDE()
            uni.fit(data[col])
            u_matrix[:, index] = uni.cumulative_distribution(data[col])

        first_tree = get_tree(TreeTypes.CENTER)
        first_tree.fit(0, 4, tau_mat, u_matrix)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])
        likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix)
        tau = first_tree.get_tau_matrix()

        # Build second tree
        second_tree = get_tree(TreeTypes.CENTER)
        second_tree.fit(1, 3, tau, first_tree)
        expected_likelihood_second_tree = 0.4888802429313932

        # Run
        likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first)

        # Check
        assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
Beispiel #3
0
    def test_serialization_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)

        # Run
        result = Tree.from_dict(instance.to_dict())

        # Check
        assert result.to_dict() == instance.to_dict()
Beispiel #4
0
    def test_to_dict(self, kde_mock):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])

        kde_instance_mock = kde_mock.return_value
        kde_instance_mock.dataset = column
        kde_instance_mock.resample.return_value = column
        distribution = GaussianKDE()
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Beispiel #5
0
    def test_percent_point(self, kde_mock, brentq_mock, cdf_mock):
        """percent_point evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        brentq_mock.return_value = -250.0
        cdf_mock.return_value = 'a nice scalar bounded method'

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)

        expected_result = np.array([-250.0])

        # Run
        result = instance.percent_point([0.5])

        # Check
        assert result == expected_result

        kde_mock.assert_called_once_with(fit_data)
        model_mock.assert_not_called()
        assert len(model_mock.method_calls) == 0

        brentq_mock.assert_called_once_with('a nice scalar bounded method',
                                            -1000, 1000)
Beispiel #6
0
    def test_percent_point_invalid_value(self):
        """Evaluating an invalid value will raise ValueError."""
        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)

        with self.assertRaises(ValueError):
            instance.percent_point(np.array([2.]))
Beispiel #7
0
    def test_fit_sample(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        assert isinstance(sampled_data, np.ndarray)
        assert sampled_data.shape == (50, )
Beispiel #8
0
    def test_to_dict_sample_size(self):
        model = GaussianKDE(sample_size=10)
        model.fit(self.constant)

        params = model.to_dict()

        assert params['type'] == 'copulas.univariate.gaussian_kde.GaussianKDE'
        assert len(params['dataset']) == 10
Beispiel #9
0
    def test_fit_empty_data(self):
        """If fitting kde model with empty data it will raise ValueError."""
        # Setup
        instance = GaussianKDE()
        data = np.array([])

        # Run / Check
        with self.assertRaises(ValueError):
            instance.fit(data)
Beispiel #10
0
    def test_pdf(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        # Test PDF
        pdf = model.probability_density(sampled_data)
        assert (0 < pdf).all()
Beispiel #11
0
    def test_percent_point_bisect(self):
        """percent_point evaluates with the model."""
        instance = GaussianKDE()
        instance.fit(np.array([0.5, 1.0, 1.5]))

        cdf = instance.percent_point(np.array([0.001, 0.5, 0.999]), method='bisect')

        assert cdf[0] < 0.0, "The 0.001th percentile should be small."
        assert abs(cdf[1] - 1.0) < 0.1, "The 50% percentile should be the median."
        assert cdf[2] > 2.0, "The 0.999th percentile should be large."
Beispiel #12
0
    def test_to_dict_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        params = model.to_dict()

        assert params == {
            'type': 'copulas.univariate.gaussian_kde.GaussianKDE',
            'dataset': [5] * 100
        }
Beispiel #13
0
    def test_fit_sample_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        sampled_data = model.sample(50)

        assert isinstance(sampled_data, np.ndarray)
        assert sampled_data.shape == (50, )

        assert model._constant_value == 5
        np.testing.assert_equal(np.full(50, 5), model.sample(50))
Beispiel #14
0
    def test_valid_serialization_fit_model(self):
        """For a fitted model to_dict and from_dict are opposites."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4])
        instance.fit(X)

        # Run
        result = GaussianKDE.from_dict(instance.to_dict())

        # Check
        assert instance.to_dict() == result.to_dict()
Beispiel #15
0
    def test_sample(self, kde_mock):
        """Sample calls the gaussian_kde.resample method."""
        instance = GaussianKDE()
        instance.fit(np.array([1, 2, 3, 4]))

        model = kde_mock.return_value
        model.resample.return_value = np.array([[1, 2, 3]])

        samples = instance.sample(3)

        instance._model.resample.assert_called_once_with(3)
        np.testing.assert_equal(samples, np.array([1, 2, 3]))
 def setUp(self):
     self.data = pd.read_csv('data/iris.data.csv')
     self.tau_mat = self.data.corr(method='kendall').values
     self.u_matrix = np.empty(self.data.shape)
     count = 0
     for col in self.data:
         uni = GaussianKDE()
         uni.fit(self.data[col])
         self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
         count += 1
     self.tree = Tree(TreeTypes.DIRECT)
     self.tree.fit(0, 4, self.tau_mat, self.u_matrix)
Beispiel #17
0
    def test_cumulative_distribution(self):
        """cumulative_distribution evaluates with the model."""
        instance = GaussianKDE()
        instance.fit(np.array([0.9, 1.0, 1.1]))

        cdf = instance.cumulative_distribution(np.array([
            0.0,  # There is no data below this (cdf = 0.0).
            1.0,  # Half the data is below this (cdf = 0.5).
            2.0,  # All the data is below this (cdf = 1.0).
            -1.0  # There is no data below this (cdf = 0).
        ]))

        assert np.all(np.isclose(cdf, np.array([0.0, 0.5, 1.0, 0.0]), atol=1e-3))
Beispiel #18
0
    def test_fit_constant(self):
        """If fit data is constant, no gaussian_kde model is created."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 1, 1, 1, 1])

        # Run
        instance.fit(X)

        # Check
        assert instance.model is None
        assert instance.constant_value == 1
        assert instance.fitted is True
Beispiel #19
0
    def test_probability_density(self, kde_mock):
        """Sample calls the gaussian_kde.resample method."""
        instance = GaussianKDE()
        instance.fit(np.array([1, 2, 3, 4]))

        model = kde_mock.return_value
        model.evaluate.return_value = np.array([0.1, 0.2, 0.3])

        pdf = instance.probability_density(np.array([1, 2, 3]))

        assert instance._model.evaluate.call_count == 1
        input_array = instance._model.evaluate.call_args[0][0]
        np.testing.assert_equal(input_array, np.array([1, 2, 3]))
        np.testing.assert_equal(pdf, np.array([0.1, 0.2, 0.3]))
Beispiel #20
0
    def test_cdf(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        # Test the CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 <= cdf).all() and (cdf <= 1).all()

        # Test CDF increasing function
        sorted_data = sorted(sampled_data)
        cdf = model.cumulative_distribution(sorted_data)
        assert (np.diff(cdf) >= 0).all()
Beispiel #21
0
    def test_to_dict_from_dict(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Beispiel #22
0
    def test_save_load(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianKDE.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Beispiel #23
0
    def test_to_dict_from_dict_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        sampled_data = model.sample(50)
        pdf = model.probability_density(sampled_data)
        cdf = model.cumulative_distribution(sampled_data)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        np.testing.assert_equal(np.full(50, 5), sampled_data)
        np.testing.assert_equal(np.full(50, 5), model2.sample(50))
        np.testing.assert_equal(np.full(50, 1), pdf)
        np.testing.assert_equal(np.full(50, 1), model2.probability_density(sampled_data))
        np.testing.assert_equal(np.full(50, 1), cdf)
        np.testing.assert_equal(np.full(50, 1), model2.cumulative_distribution(sampled_data))
Beispiel #24
0
    def test_fit(self, kde_mock):
        """On fit, a new instance of gaussian_kde is fitted."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4, 5])

        kde_instance = MagicMock(evaluate='pdf')
        kde_mock.return_value = kde_instance

        # Run
        instance.fit(X)

        # Check
        assert instance.model == kde_instance
        assert instance.fitted is True
        assert instance.constant_value is None
        assert instance.probability_density == 'pdf'
        kde_mock.assert_called_once_with(X)
Beispiel #25
0
    def test_sample(self, kde_mock):
        """When fitted, we are able to use the model to get samples."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.resample.return_value = np.array([[0, 1, 0, 1, 0]])

        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4, 5])
        instance.fit(X)

        expected_result = np.array([0, 1, 0, 1, 0])

        # Run
        result = instance.sample(5)

        # Check
        compare_nested_iterables(result, expected_result)

        assert instance.model == model_mock
        kde_mock.assert_called_once_with(X)
        model_mock.resample.assert_called_once_with(5)
Beispiel #26
0
    def test_probability_density(self, kde_mock):
        """probability_density evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.evaluate.return_value = np.array([0.0, 0.5, 1.0])

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)
        call_data = np.array([-10, 0, 10])

        expected_result = np.array([0.0, 0.5, 1.0])

        # Run
        result = instance.probability_density(call_data)

        # Check
        compare_nested_iterables(result, expected_result)

        kde_mock.assert_called_once_with(fit_data)
        model_mock.evaluate.assert_called_once_with(call_data)
Beispiel #27
0
    def test_to_dict(self):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        distribution = GaussianKDE()
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'constant_value':
            None,
            'd':
            1,
            'n':
            10,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
            'covariance': [[0.20810696044195218]],
            'factor':
            0.6309573444801932,
            'inv_cov': [[4.805221304834407]]
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Beispiel #28
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'index': 0,
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [0.7969636014074211, 0.6887638642325501, 0.12078520049364487],
                        [0.6887638642325501, 0.7969636014074211, 0.12078520049364487]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'index': 1,
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [0.12078520049364491, 0.7969636014074213, 0.6887638642325501],
                        [0.12078520049364491, 0.6887638642325503, 0.7969636014074211]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Beispiel #29
0
    def test__is_constant_false(self):
        distribution = GaussianKDE()

        distribution.fit(np.array([1, 2, 3, 4]))

        assert not distribution._is_constant()
Beispiel #30
0
    def test__is_constant_true(self):
        distribution = GaussianKDE()

        distribution.fit(np.array([1, 1, 1, 1]))

        assert distribution._is_constant()