import numpy as np
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier, BayesianGMMClassifier
from tests.conftest import general_checks, nonmeta_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_non_transformer_estimators_n_iter",
        ]
    )
)
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)
    clf = BayesianGMMClassifier()
    test_fn(BayesianGMMClassifier.__name__, clf)


def test_obvious_usecase():
    X = np.concatenate(
        [np.random.normal(-10, 1, (100, 2)), np.random.normal(10, 1, (100, 2))]
    )
    y = np.concatenate([np.zeros(100), np.ones(100)])
    assert (GMMClassifier().fit(X, y).predict(X) == y).all()
Example #2
0
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.dummy import DummyRegressor

from sklego.common import flatten
from sklego.meta import GroupedPredictor
from sklego.datasets import load_chicken

from tests.conftest import general_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks]),
        exclude=[
            # Nonsense checks because we always need at least two columns (group and value)
            "check_fit1d",
            "check_fit2d_predict1d",
            "check_fit2d_1feature",
            "check_transformer_data_not_an_array",
        ],
    ),
)
def test_estimator_checks(test_fn):
    clf = GroupedPredictor(
        estimator=LinearRegression(), groups=0, use_global_model=True
    )
    test_fn(GroupedPredictor.__name__ + "_fallback", clf)
from sklego.meta import ZeroInflatedRegressor
from sklego.testing import check_shape_remains_same_regressor
from tests.conftest import general_checks, select_tests, regressor_checks


@pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor])
def test_zir(test_fn):
    regr = ZeroInflatedRegressor(
        classifier=ExtraTreesClassifier(random_state=0),
        regressor=ExtraTreesRegressor(random_state=0))
    test_fn(ZeroInflatedRegressor.__name__, regr)


@pytest.mark.parametrize("test_fn",
                         select_tests(
                             flatten([general_checks, regressor_checks]), ))
def test_estimator_checks(test_fn):
    test_fn(
        ZeroInflatedRegressor.__name__,
        ZeroInflatedRegressor(classifier=ExtraTreesClassifier(random_state=0),
                              regressor=ExtraTreesRegressor(random_state=0)))


def test_zero_inflated_example():
    from sklearn.model_selection import cross_val_score

    np.random.seed(0)
    X = np.random.randn(10000, 4)
    y = ((X[:, 0] > 0) & (X[:, 1] > 0)) * np.abs(
        X[:, 2] * X[:, 3]**2)  # many zeroes here, in about 75% of the cases.
from sklearn.pipeline import Pipeline
from sklearn.utils import estimator_checks

from sklego.common import flatten
from sklego.meta import OutlierRemover
from sklego.mixture import GMMOutlierDetector


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_transformers_unfitted,
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_transformers_unfitted,
    ]),
)
def test_estimator_checks(test_fn):
    gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector(),
                                 refit=True)
    test_fn(OutlierRemover.__name__, gmm_remover)

    isolation_forest_remover = OutlierRemover(
        outlier_detector=IsolationForest(), refit=True)
    test_fn(OutlierRemover.__name__, isolation_forest_remover)
import numpy as np
import pytest

from sklego.common import flatten
from sklego.linear_model import ProbWeightRegression
from tests.conftest import nonmeta_checks, regressor_checks, general_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks, regressor_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_sample_weights_list",
            "check_sample_weights_pandas_series"
        ]
    )
)
@pytest.mark.cvxpy
def test_estimator_checks(test_fn):
    regr_min_zero = ProbWeightRegression(non_negative=True)
    test_fn(ProbWeightRegression.__name__ + "_min_zero_true", regr_min_zero)
    regr_not_min_zero = ProbWeightRegression(non_negative=False)
    test_fn(ProbWeightRegression.__name__ + "_min_zero_true_false", regr_not_min_zero)


@pytest.mark.cvxpy
def test_shape_trained_model(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    mod_no_intercept = ProbWeightRegression()
Example #6
0
import numpy as np
import pandas as pd
import pytest

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks, outlier_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_outliers_train"
        ]
    )
)
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile", bayes_clf_quantile)

    bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
import pytest
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor


from sklego.common import flatten
from sklego.meta import DecayEstimator
from tests.conftest import general_checks, classifier_checks, regressor_checks, nonmeta_checks


@pytest.mark.parametrize("test_fn", flatten([
    general_checks,
    nonmeta_checks,
    regressor_checks
]))
def test_estimator_checks_regression(test_fn):
    trf = DecayEstimator(LinearRegression())
    test_fn(DecayEstimator.__name__, trf)


@pytest.mark.parametrize("test_fn", flatten([
    general_checks,
    nonmeta_checks,
    classifier_checks
]))
def test_estimator_checks_classification(test_fn):
    trf = DecayEstimator(LogisticRegression(solver='lbfgs'))
    test_fn(DecayEstimator.__name__, trf)
import numpy as np
import pandas as pd
import pytest

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, outlier_checks]),
                 exclude=[
                     "check_sample_weights_invariance", "check_outliers_train",
                     "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999,
                                                    method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile",
            bayes_clf_quantile)

    bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
Example #9
0
from pandas.tests.extension.numpy_.test_numpy_nested import np
from sklearn import clone
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.utils import check_X_y

from sklego.common import flatten
from sklego.meta import EstimatorTransformer
from tests.conftest import transformer_checks, nonmeta_checks, general_checks


@pytest.mark.parametrize("test_fn",
                         flatten([
                             transformer_checks,
                             nonmeta_checks,
                             general_checks,
                         ]))
def test_estimator_checks(test_fn):
    trf = EstimatorTransformer(LinearRegression())
    test_fn(EstimatorTransformer.__name__, trf)


def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy='most_frequent')
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
Example #10
0
from sklearn.utils.estimator_checks import check_transformers_unfitted

from sklego.common import flatten
from sklego.preprocessing import RandomAdder
from tests.conftest import nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        # Transformer checks
        check_transformers_unfitted,
        # General checks
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        estimator_checks.check_dont_overwrite_parameters
    ]))
def test_estimator_checks(test_fn):
    # Tests that are skipped:
    # check_methods_subset_invariance: Since we add noise, the method is not invariant on a subset
    # check_transformer_data_not_an_array: tests with `NotAnArray` as X for which we don't have a hashing function
    # check_transformer_general: tests with lists as X for which we don't have a hashing function
    adder = RandomAdder()
    test_fn(RandomAdder.__name__, adder)

Example #11
0
import pytest
import numpy as np
from sklearn import clone
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.utils import check_X_y

from sklego.common import flatten
from sklego.meta import EstimatorTransformer
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn", flatten([transformer_checks, general_checks])
)
def test_estimator_checks(test_fn):
    trf = EstimatorTransformer(LinearRegression())
    test_fn(EstimatorTransformer.__name__, trf)


def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy="most_frequent")
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
    assert np.all(transformed == clf.fit(X, y).predict(X))
 flatten([
     # non-meta checks
     estimator_checks.check_estimators_dtypes,
     estimator_checks.check_fit_score_takes_y,
     estimator_checks.check_dtype_object,
     estimator_checks.check_sample_weights_pandas_series,
     estimator_checks.check_sample_weights_list,
     estimator_checks.check_sample_weights_invariance,
     estimator_checks.check_estimators_fit_returns_self,
     estimator_checks.check_complex_data,
     estimator_checks.check_estimators_empty_data_messages,
     estimator_checks.check_pipeline_consistency,
     estimator_checks.check_estimators_nan_inf,
     estimator_checks.check_estimators_overwrite_params,
     estimator_checks.check_estimator_sparse_data,
     estimator_checks.check_estimators_pickle,
     # general checks
     estimator_checks.check_fit2d_predict1d,
     estimator_checks.check_methods_subset_invariance,
     estimator_checks.check_fit2d_1sample,
     estimator_checks.check_fit2d_1feature,
     estimator_checks.check_fit1d,
     estimator_checks.check_get_params_invariance,
     estimator_checks.check_set_params,
     estimator_checks.check_dict_unchanged,
     estimator_checks.check_dont_overwrite_parameters,
     # outlier_checks
     # estimator_checks.check_outliers_fit_predict,
     # estimator_checks.check_outliers_train
     estimator_checks.check_classifier_data_not_an_array,
     estimator_checks.check_estimators_unfitted,
 ]),
Example #13
0
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten(
        [
            transformer_checks,
            general_checks,
            # nonmeta_checks
            estimator_checks.check_estimators_dtypes,
            estimator_checks.check_fit_score_takes_y,
            estimator_checks.check_dtype_object,
            estimator_checks.check_sample_weights_pandas_series,
            estimator_checks.check_sample_weights_list,
            estimator_checks.check_sample_weights_invariance,
            estimator_checks.check_estimators_fit_returns_self,
            estimator_checks.check_complex_data,
            estimator_checks.check_estimators_empty_data_messages,
            estimator_checks.check_pipeline_consistency,
            # ColumnCapper works with nan/inf cells
            # estimator_checks.check_estimators_nan_inf,
            estimator_checks.check_estimators_overwrite_params,
            estimator_checks.check_estimator_sparse_data,
            estimator_checks.check_estimators_pickle,
        ]
    ),
)
def test_estimator_checks(test_fn):
    test_fn(ColumnCapper.__name__, ColumnCapper())

Example #14
0
from tests.conftest import nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        check_shape_remains_same_regressor,
        # General checks
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        estimator_checks.check_dont_overwrite_parameters,
        # Regressor checks
        estimator_checks.check_regressor_data_not_an_array,
        estimator_checks.check_estimators_partial_fit_n_features,
        estimator_checks.check_regressors_no_decision_function,
        estimator_checks.check_supervised_y_2d,
        estimator_checks.check_supervised_y_no_nan,
        estimator_checks.check_regressors_int,
        estimator_checks.check_estimators_unfitted,
    ]))
def test_estimator_checks(test_fn):
    # Tests that are skipped:
    # 'check_methods_subset_invariance': Since we add noise, the method is not invariant on a subset
    # 'check_regressors_train': score is not always greater than 0.5 due to randomness
    regr_normal = RandomRegressor(strategy="normal")
import numpy as np
import pytest
from sklearn.cluster import DBSCAN
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Ridge, LogisticRegression

from sklego.common import flatten
from sklego.meta import SubjectiveClassifier
from tests.conftest import general_checks, classifier_checks


@pytest.mark.parametrize("test_fn",
                         flatten([general_checks, classifier_checks]))
def test_estimator_checks_classification(test_fn):
    if test_fn.__name__ == 'check_classifiers_classes':
        prior = {
            'one': 0.1,
            'two': 0.1,
            'three': 0.1,
            -1: 0.1,
            1: 0.6
        }  # nonsensical prior to make sklearn check pass
    else:
        prior = {0: 0.7, 1: 0.2, 2: 0.1}

    # Some of the sklearn checkers generate random y data with 3 classes, so prior needs to have these classes
    estimator = SubjectiveClassifier(LogisticRegression(), prior)
    test_fn(SubjectiveClassifier.__name__, estimator)


@pytest.mark.parametrize(
Example #16
0
import numpy as np
import pytest

from sklego.common import flatten
from sklego.linear_model import LowessRegression
from tests.conftest import nonmeta_checks, regressor_checks, general_checks


@pytest.mark.parametrize(
    "test_fn", flatten([nonmeta_checks, general_checks, regressor_checks]))
def test_estimator_checks(test_fn):
    lowess = LowessRegression()
    test_fn(LowessRegression.__name__, lowess)


def test_obvious_usecase():
    x = np.linspace(0, 10, 100)
    X = x.reshape(-1, 1)
    y = np.ones(x.shape)
    y_pred = LowessRegression().fit(X, y).predict(X)
    assert np.isclose(y, y_pred).all()
Example #17
0
import numpy as np
import pytest
from sklearn.model_selection import train_test_split

from sklego.common import flatten
from sklego.preprocessing import RandomAdder


from tests.conftest import select_tests, transformer_checks, nonmeta_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, transformer_checks, nonmeta_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_methods_subset_invariance",
            "check_transformer_data_not_an_array",
            "check_transformer_general"
        ]
    )
)
def test_estimator_checks(test_fn):
    adder = RandomAdder()
    test_fn(RandomAdder.__name__, adder)


def test_dtype_regression(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    assert RandomAdder().fit(X, y).transform(X).dtype == np.float
Example #18
0
import pytest
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import LocalOutlierFactor

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector
from sklego.meta import OutlierClassifier

from tests.conftest import general_checks, select_tests


@pytest.mark.parametrize("test_fn",
                         select_tests(flatten([general_checks]),
                                      exclude=[
                                          "check_sample_weights_invariance",
                                      ]))
def test_estimator_checks(test_fn):
    mod_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    clf_quantile = OutlierClassifier(mod_quantile)
    test_fn('OutlierClassifier', clf_quantile)


@pytest.fixture
def dataset():
    np.random.seed(42)
    return np.random.normal(0, 1, (2000, 2))

Example #19
0
import pytest
import numpy as np
from cvxpy import SolverError
from sklearn.linear_model import LogisticRegression

from sklego.common import flatten
from sklego.linear_model import FairClassifier
from sklego.metrics import p_percent_score
from tests.conftest import general_checks, nonmeta_checks, classifier_checks


@pytest.mark.parametrize(
    "test_fn", flatten([general_checks, nonmeta_checks, classifier_checks])
)
def test_standard_checks(test_fn):
    trf = FairClassifier(
        covariance_threshold=None,
        C=1,
        penalty="none",
        sensitive_cols=[0],
        train_sensitive_cols=True,
    )
    test_fn(FairClassifier.__name__, trf)


def _test_same(dataset):
    X, y = dataset
    if X.shape[1] == 1:
        # If we only have one column (which is also the sensitive one) we can't fit
        return True
import numpy as np
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier
from sklego.testing import check_shape_remains_same_classifier
from tests.conftest import nonmeta_checks, general_checks, classifier_checks


@pytest.mark.parametrize("test_fn",
                         flatten([
                             nonmeta_checks, general_checks, classifier_checks,
                             check_shape_remains_same_classifier
                         ]))
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)


def test_obvious_usecase():
    X = np.concatenate([
        np.random.normal(-10, 1, (100, 2)),
        np.random.normal(10, 1, (100, 2))
    ])
    y = np.concatenate([np.zeros(100), np.ones(100)])
    assert (GMMClassifier().fit(X, y).predict(X) == y).all()


def test_value_error_threshold():
    X = np.concatenate([
        np.random.normal(-10, 1, (100, 2)),
Example #21
0
import pytest
import numpy as np
import pandas as pd

from sklearn.utils.validation import FLOAT_DTYPES
from sklego.common import flatten
from sklego.preprocessing import ColumnCapper
from tests.conftest import select_tests, transformer_checks, general_checks, nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, transformer_checks]),
                 exclude=[
                     "check_sample_weights_invariance",
                     "check_estimators_nan_inf", "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
def test_estimator_checks(test_fn):
    test_fn(ColumnCapper.__name__, ColumnCapper())


def test_quantile_range():
    def expect_type_error(quantile_range):
        with pytest.raises(TypeError):
            ColumnCapper(quantile_range)

    def expect_value_error(quantile_range):
        with pytest.raises(ValueError):
            ColumnCapper(quantile_range)
Example #22
0

@pytest.mark.parametrize(
    "test_fn",
    flatten([
        # GENERAL CHECKS #
        # estimator_checks.check_fit2d_predict1d -> we only test for two classes
        # estimator_checks.check_methods_subset_invariance -> we only test for two classes
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        # estimator_checks.check_dont_overwrite_parameters -> we only test for two classes
        # CLASSIFIER CHECKS #
        estimator_checks.check_classifier_data_not_an_array,
        estimator_checks.check_classifiers_one_label,
        # estimator_checks.check_classifiers_classes -> we only test for two classes
        estimator_checks.check_estimators_partial_fit_n_features,
        # estimator_checks.check_classifiers_train -> we only test for two classes
        # estimator_checks.check_supervised_y_2d -> we only test for two classes
        estimator_checks.check_supervised_y_no_nan,
        estimator_checks.check_estimators_unfitted,
        estimator_checks.check_non_transformer_estimators_n_iter,
        estimator_checks.check_decision_proba_consistency,
    ]),
)
def test_standard_checks(test_fn):
    trf = Thresholder(LogisticRegression(), threshold=0.5)
    test_fn(Thresholder.__name__, trf)
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier, BayesianGMMClassifier
from sklego.testing import check_shape_remains_same_classifier
from tests.conftest import nonmeta_checks, general_checks, estimator_checks


@pytest.mark.parametrize("test_fn", flatten([
    nonmeta_checks,
    general_checks,
    estimator_checks.check_classifier_data_not_an_array,
    estimator_checks.check_classifiers_one_label,
    estimator_checks.check_classifiers_classes,
    estimator_checks.check_estimators_partial_fit_n_features,
    estimator_checks.check_classifiers_train,
    estimator_checks.check_supervised_y_2d,
    estimator_checks.check_supervised_y_no_nan,
    estimator_checks.check_estimators_unfitted,
    # estimator_checks.check_non_transformer_estimators_n_iter, our method does not have n_iter
    estimator_checks.check_decision_proba_consistency,
    check_shape_remains_same_classifier
]))
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)
    clf = BayesianGMMClassifier()
    test_fn(BayesianGMMClassifier.__name__, clf)


def test_obvious_usecase():
Example #24
0
from sklego.preprocessing import InformationFilter
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        transformer_checks,
        general_checks,
        # nonmeta_checks
        estimator_checks.check_estimators_dtypes,
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_dtype_object,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_fit_returns_self,
        estimator_checks.check_complex_data,
        # this won't work because we need to select a column
        # estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_pipeline_consistency,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimator_sparse_data,
        estimator_checks.check_estimators_pickle,
    ]),
)
def test_estimator_checks(test_fn):
    test_fn(InformationFilter.__name__, InformationFilter(columns=[0]))


def test_v_columns_orthogonal():
from sklego.common import flatten
from sklego.meta import GroupedEstimator
from sklego.datasets import load_chicken


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimators_pickle,
        estimator_checks.check_fit2d_1sample,
        # estimator_checks.check_fit1d not tested because in 1d we cannot have both groups and data
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_set_params,
    ]),
)
def test_estimator_checks(test_fn):
    clf = GroupedEstimator(estimator=LinearRegression(),
                           groups=[0],
                           use_global_model=True)
    test_fn(GroupedEstimator.__name__ + "_fallback", clf)

    clf = GroupedEstimator(estimator=LinearRegression(),
Example #26
0
import pytest
import numpy as np
from sklearn.linear_model import LogisticRegression

from sklego.common import flatten
from sklego.linear_model import EqualOpportunityClassifier
from sklego.metrics import equal_opportunity_score
from tests.conftest import general_checks, classifier_checks, select_tests, nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, classifier_checks]),
                 exclude=[
                     "check_sample_weights_invariance",
                     "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
@pytest.mark.cvxpy
def test_standard_checks(test_fn):
    trf = EqualOpportunityClassifier(
        covariance_threshold=None,
        positive_target=True,
        C=1,
        penalty="none",
        sensitive_cols=[0],
        train_sensitive_cols=True,
    )
    test_fn(EqualOpportunityClassifier.__name__, trf)

Example #27
0
from sklego.common import flatten
from sklego.meta import GroupedEstimator
from sklego.datasets import load_chicken


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimators_pickle,
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit1d,
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_set_params,
    ]))
def test_estimator_checks(test_fn):
    clf = GroupedEstimator(estimator=LinearRegression(),
                           groups=[0],
                           use_fallback=True)
    test_fn(GroupedEstimator.__name__ + "_fallback", clf)

    clf = GroupedEstimator(estimator=LinearRegression(),
    """Test if fit_intercept and copy_X work."""
    X, y = _create_dataset(coefs, intercept, noise=2.0)
    imb = QuantileRegression(fit_intercept=False, copy_X=False)
    imb.fit(X, y)

    assert imb.intercept_ == 0.0


@pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor])
def test_quant(test_fn):
    regr = QuantileRegression()
    test_fn(QuantileRegression.__name__, regr)


@pytest.mark.parametrize(
    "regr", [(QuantileRegression.__name__, QuantileRegression()),
             (QuantileRegression.__name__ + "_positive",
              QuantileRegression(positive=True)),
             (QuantileRegression.__name__ + "_positive__no_intercept",
              QuantileRegression(positive=True, fit_intercept=False)),
             (QuantileRegression.__name__ + "_no_intercept",
              QuantileRegression(fit_intercept=False)),
             (QuantileRegression.__name__ + "_quantile",
              QuantileRegression(quantile=0.3))])
@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks,
                          regressor_checks]), ))
def test_estimator_checks(regr, test_fn):
    test_fn(*regr)
Example #29
0
import numpy as np
import pandas as pd
import pytest
from sklearn.utils import estimator_checks

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import nonmeta_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        general_checks,
        # outlier checks
        estimator_checks.check_outliers_fit_predict,
        estimator_checks.check_classifier_data_not_an_array,
        estimator_checks.check_estimators_unfitted,
    ]))
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + '_quantile', clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + '_stddev', clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999,
                                                    method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + '_quantile',
            bayes_clf_quantile)
from tests.conftest import nonmeta_checks, general_checks, transformer_checks


@pytest.fixture
def sample_matrix():
    np.random.seed(1313)
    return np.random.normal(size=(50, 10))


@pytest.fixture
def sample_df(sample_matrix):
    return pd.DataFrame(sample_matrix)


@pytest.mark.parametrize(
    "test_fn", flatten([nonmeta_checks, general_checks, transformer_checks])
)
def test_estimator_checks(test_fn):
    test_fn(OrthogonalTransformer.__name__, OrthogonalTransformer())


def check_is_orthogonal(X, tolerance=10 ** -5):
    """
    Check if X is an column orthogonal matrix. If X is column orthogonal, then X.T * X equals the identity matrix
    :param X: Matrix to check
    :param tolerance: Tolerance for difference caused by rounding
    :raises: AssertionError if X is not orthogonal
    """
    diff_with_eye = np.dot(X.T, X) - np.eye(X.shape[1])

    if np.max(np.abs(diff_with_eye)) > tolerance: