예제 #1
0
    def test_fit_not_contant(self, select_mock):
        """if not constant call select_univariate and fit the returned instance.

        Check that candidates are passed down to select_univariate
        and that the returned instance is fitted on the input data.
        """
        # Setup
        candidate = MagicMock()
        candidates = [candidate]
        distribution = Univariate(candidates)

        # Run
        data = np.array([1, 2, 3, 4, 5])
        distribution.fit(data)

        # Assert
        assert distribution.fitted
        assert distribution.constant_value is None

        # candidates are passed down
        assert select_mock.call_count == 1
        expected_call = call(data, candidates)[1:]
        actual_call = select_mock.call_args
        compare_nested_iterables(expected_call, actual_call)

        # the returned instance is fitted
        instance = select_mock.return_value
        assert instance.fit.call_count == 1
        expected_call = call(data)[1:]
        actual_call = instance.fit.call_args
        compare_nested_iterables(expected_call, actual_call)
예제 #2
0
    def test_fit_not_constant(self):
        """if constant values, replace methods."""
        # Setup
        distribution = Univariate()

        # Run
        distribution.fit(np.array([1, 2, 3, 4, 1]))

        # Assert
        assert distribution.fitted
        assert not distribution._instance._is_constant()
예제 #3
0
    def test_check_constant_value_non_constant(self):
        """_check_constant_value returns False if the array is not constant."""
        # Setup
        X = np.array([1, 2, 3, 4])

        # Run
        uni = Univariate()
        constant = uni._check_constant_value(X)

        # Check
        assert not constant
예제 #4
0
    def test_check_constant_value(self):
        """check_constant_value return True if the array is constant."""
        # Setup
        X = np.array([1, 1, 1, 1])

        # Run
        uni = Univariate()
        constant = uni._check_constant_value(X)

        # Check
        assert constant
예제 #5
0
    def test__constant_sample(self):
        """_constant_sample returns a constant array of num_samples length."""
        # Setup
        instance = Univariate()
        instance._constant_value = 15

        expected_result = np.array([15, 15, 15, 15, 15])

        # Run
        result = instance._constant_sample(5)

        # Check
        compare_nested_iterables(result, expected_result)
예제 #6
0
파일: loss.py 프로젝트: sdv-dev/SDMetrics
    def fit(self, data, cols):
        """Fits univariate distributions (automatically selected).

        Args:
            data (DataFrame):
                Data, where each column in `cols` is a continuous column.
            cols (list[str]):
                Column names.
        """
        for col in cols:
            col_data = np.array(data[col])
            dist_model = Univariate()
            dist_model.fit(col_data)
            self.cdfs.append(dist_model)
예제 #7
0
    def test__constant_cumulative_distribution(self):
        """constant_cumulative_distribution returns only 0 and 1."""
        # Setup
        instance = Univariate()
        instance._constant_value = 3

        X = np.array([1, 2, 3, 4, 5])
        expected_result = np.array([0, 0, 1, 1, 1])

        # Run
        result = instance._constant_cumulative_distribution(X)

        # Check
        compare_nested_iterables(result, expected_result)
예제 #8
0
    def test__constant_probability_density(self):
        """constant_probability_density only is 1 in self.constant_value."""
        # Setup
        instance = Univariate()
        instance._constant_value = 3

        X = np.array([1, 2, 3, 4, 5])
        expected_result = np.array([0, 0, 1, 0, 0])

        # Run
        result = instance._constant_probability_density(X)

        # Check
        compare_nested_iterables(result, expected_result)
예제 #9
0
    def test_fit_contant(self):
        """if constant values, replace methods."""
        # Setup
        distribution = Univariate()
        replace_mock = MagicMock()
        distribution._replace_constant_methods = replace_mock

        # Run
        distribution.fit(np.array([1, 1, 1, 1, 1]))

        # Assert
        assert distribution.fitted
        assert distribution.constant_value == 1
        replace_mock.assert_called_once_with()
예제 #10
0
    def test__constant_percent_point(self):
        """constant_percent_point only is self.constant_value in non-zero probabilities."""
        # Setup
        instance = Univariate()
        instance._constant_value = 3

        X = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
        expected_result = np.array([3, 3, 3, 3, 3, 3])

        # Run
        result = instance._constant_percent_point(X)

        # Check
        compare_nested_iterables(result, expected_result)
예제 #11
0
    def test__select_candidates_non_parametric(self):
        # Run
        candidates = Univariate._select_candidates(
            parametric=ParametricType.NON_PARAMETRIC)

        # Assert
        assert candidates == [GaussianKDE]
예제 #12
0
    def test__select_candidates_semibounded(self):
        # Run
        candidates = Univariate._select_candidates(
            bounded=BoundedType.SEMI_BOUNDED)

        # Assert
        assert set(candidates) == {GammaUnivariate, LogLaplace}
예제 #13
0
    def test__select_candidates_bounded(self):
        # Run
        candidates = Univariate._select_candidates(bounded=BoundedType.BOUNDED)

        # Assert
        assert set(candidates) == {
            TruncatedGaussian, BetaUnivariate, UniformUnivariate
        }
예제 #14
0
    def test__select_candidates_unbounded(self):
        # Run
        candidates = Univariate._select_candidates(
            bounded=BoundedType.UNBOUNDED)

        # Assert
        assert set(candidates) == {
            GaussianKDE, GaussianUnivariate, StudentTUnivariate
        }
예제 #15
0
    def test__select_candidates(self):
        # Run
        candidates = Univariate._select_candidates()

        # Assert
        assert set(candidates) == {
            GaussianKDE, GaussianUnivariate, TruncatedGaussian, BetaUnivariate,
            GammaUnivariate, StudentTUnivariate, UniformUnivariate
        }
예제 #16
0
    def test__select_candidates_parametric(self):
        # Run
        candidates = Univariate._select_candidates(
            parametric=ParametricType.PARAMETRIC)

        # Assert
        assert set(candidates) == {
            GaussianUnivariate, TruncatedGaussian, BetaUnivariate,
            GammaUnivariate, StudentTUnivariate, UniformUnivariate
        }
예제 #17
0
    def test_fit_selection_sample_size_small(self, select_mock):
        """if selection_sample_size is smaller than data, subsample the data before selecting."""
        # Setup
        distribution = Univariate(selection_sample_size=3)

        # Run
        distribution.fit(np.array([1, 1, 1, 1, 1]))

        # Assert
        assert distribution.fitted
        assert distribution._instance == select_mock.return_value

        call_args = select_mock.call_args_list
        selection_sample = call_args[0][0][0]
        np.testing.assert_array_equal(selection_sample, np.array([1, 1, 1]))

        fit_call_args = select_mock.return_value.fit.call_args_list
        np.testing.assert_array_equal(fit_call_args[0][0][0],
                                      np.array([1, 1, 1, 1, 1]))
예제 #18
0
    def test_get_constant_value(self):
        """get_constant_value return the unique value of an array if it exists."""
        # Setup
        X = np.array([1, 1, 1, 1])
        expected_result = 1

        # Run
        result = Univariate._get_constant_value(X)

        # Check
        assert result == expected_result
예제 #19
0
    def test_get_constant_value_non_constant(self):
        """get_constant_value return None on non-constant arrays."""
        # Setup
        X = np.array(range(5))
        expected_result = None

        # Run
        result = Univariate._get_constant_value(X)

        # Check
        assert result is expected_result
예제 #20
0
 def fit(self, data, cols):
     for col in cols:
         col_data = np.array(data[col])
         dist_model = Univariate()
         dist_model.fit(col_data)
         self.cdfs.append(dist_model)