import numpy as np
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier, BayesianGMMClassifier
from tests.conftest import general_checks, nonmeta_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_non_transformer_estimators_n_iter",
        ]
    )
)
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)
    clf = BayesianGMMClassifier()
    test_fn(BayesianGMMClassifier.__name__, clf)


def test_obvious_usecase():
    X = np.concatenate(
        [np.random.normal(-10, 1, (100, 2)), np.random.normal(10, 1, (100, 2))]
    )
    y = np.concatenate([np.zeros(100), np.ones(100)])
    assert (GMMClassifier().fit(X, y).predict(X) == y).all()
Ejemplo n.º 2
0
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.dummy import DummyRegressor

from sklego.common import flatten
from sklego.meta import GroupedPredictor
from sklego.datasets import load_chicken

from tests.conftest import general_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks]),
        exclude=[
            # Nonsense checks because we always need at least two columns (group and value)
            "check_fit1d",
            "check_fit2d_predict1d",
            "check_fit2d_1feature",
            "check_transformer_data_not_an_array",
        ],
    ),
)
def test_estimator_checks(test_fn):
    clf = GroupedPredictor(
        estimator=LinearRegression(), groups=0, use_global_model=True
    )
    test_fn(GroupedPredictor.__name__ + "_fallback", clf)
from sklego.meta import ZeroInflatedRegressor
from sklego.testing import check_shape_remains_same_regressor
from tests.conftest import general_checks, select_tests, regressor_checks


@pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor])
def test_zir(test_fn):
    regr = ZeroInflatedRegressor(
        classifier=ExtraTreesClassifier(random_state=0),
        regressor=ExtraTreesRegressor(random_state=0))
    test_fn(ZeroInflatedRegressor.__name__, regr)


@pytest.mark.parametrize("test_fn",
                         select_tests(
                             flatten([general_checks, regressor_checks]), ))
def test_estimator_checks(test_fn):
    test_fn(
        ZeroInflatedRegressor.__name__,
        ZeroInflatedRegressor(classifier=ExtraTreesClassifier(random_state=0),
                              regressor=ExtraTreesRegressor(random_state=0)))


def test_zero_inflated_example():
    from sklearn.model_selection import cross_val_score

    np.random.seed(0)
    X = np.random.randn(10000, 4)
    y = ((X[:, 0] > 0) & (X[:, 1] > 0)) * np.abs(
        X[:, 2] * X[:, 3]**2)  # many zeroes here, in about 75% of the cases.
Ejemplo n.º 4
0
from sklearn.pipeline import Pipeline
from sklearn.utils import estimator_checks

from sklego.common import flatten
from sklego.meta import OutlierRemover
from sklego.mixture import GMMOutlierDetector


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_transformers_unfitted,
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_transformers_unfitted,
    ]),
)
def test_estimator_checks(test_fn):
    gmm_remover = OutlierRemover(outlier_detector=GMMOutlierDetector(),
                                 refit=True)
    test_fn(OutlierRemover.__name__, gmm_remover)

    isolation_forest_remover = OutlierRemover(
        outlier_detector=IsolationForest(), refit=True)
    test_fn(OutlierRemover.__name__, isolation_forest_remover)
import numpy as np
import pytest

from sklego.common import flatten
from sklego.linear_model import ProbWeightRegression
from tests.conftest import nonmeta_checks, regressor_checks, general_checks, select_tests


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks, regressor_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_sample_weights_list",
            "check_sample_weights_pandas_series"
        ]
    )
)
@pytest.mark.cvxpy
def test_estimator_checks(test_fn):
    regr_min_zero = ProbWeightRegression(non_negative=True)
    test_fn(ProbWeightRegression.__name__ + "_min_zero_true", regr_min_zero)
    regr_not_min_zero = ProbWeightRegression(non_negative=False)
    test_fn(ProbWeightRegression.__name__ + "_min_zero_true_false", regr_not_min_zero)


@pytest.mark.cvxpy
def test_shape_trained_model(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    mod_no_intercept = ProbWeightRegression()
Ejemplo n.º 6
0
import numpy as np
import pandas as pd
import pytest

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, nonmeta_checks, outlier_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_outliers_train"
        ]
    )
)
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile", bayes_clf_quantile)

    bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
Ejemplo n.º 7
0
import pytest
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor


from sklego.common import flatten
from sklego.meta import DecayEstimator
from tests.conftest import general_checks, classifier_checks, regressor_checks, nonmeta_checks


@pytest.mark.parametrize("test_fn", flatten([
    general_checks,
    nonmeta_checks,
    regressor_checks
]))
def test_estimator_checks_regression(test_fn):
    trf = DecayEstimator(LinearRegression())
    test_fn(DecayEstimator.__name__, trf)


@pytest.mark.parametrize("test_fn", flatten([
    general_checks,
    nonmeta_checks,
    classifier_checks
]))
def test_estimator_checks_classification(test_fn):
    trf = DecayEstimator(LogisticRegression(solver='lbfgs'))
    test_fn(DecayEstimator.__name__, trf)
Ejemplo n.º 8
0
import numpy as np
import pandas as pd
import pytest

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import general_checks, nonmeta_checks, select_tests, outlier_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, outlier_checks]),
                 exclude=[
                     "check_sample_weights_invariance", "check_outliers_train",
                     "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + "_quantile", clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + "_stddev", clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999,
                                                    method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_quantile",
            bayes_clf_quantile)

    bayes_clf_stddev = BayesianGMMOutlierDetector(threshold=2, method="stddev")
    test_fn(BayesianGMMOutlierDetector.__name__ + "_stddev", bayes_clf_stddev)
Ejemplo n.º 9
0
from pandas.tests.extension.numpy_.test_numpy_nested import np
from sklearn import clone
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.utils import check_X_y

from sklego.common import flatten
from sklego.meta import EstimatorTransformer
from tests.conftest import transformer_checks, nonmeta_checks, general_checks


@pytest.mark.parametrize("test_fn",
                         flatten([
                             transformer_checks,
                             nonmeta_checks,
                             general_checks,
                         ]))
def test_estimator_checks(test_fn):
    trf = EstimatorTransformer(LinearRegression())
    test_fn(EstimatorTransformer.__name__, trf)


def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy='most_frequent')
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
Ejemplo n.º 10
0
from sklearn.utils.estimator_checks import check_transformers_unfitted

from sklego.common import flatten
from sklego.preprocessing import RandomAdder
from tests.conftest import nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        # Transformer checks
        check_transformers_unfitted,
        # General checks
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        estimator_checks.check_dont_overwrite_parameters
    ]))
def test_estimator_checks(test_fn):
    # Tests that are skipped:
    # check_methods_subset_invariance: Since we add noise, the method is not invariant on a subset
    # check_transformer_data_not_an_array: tests with `NotAnArray` as X for which we don't have a hashing function
    # check_transformer_general: tests with lists as X for which we don't have a hashing function
    adder = RandomAdder()
    test_fn(RandomAdder.__name__, adder)

Ejemplo n.º 11
0
import pytest
import numpy as np
from sklearn import clone
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.utils import check_X_y

from sklego.common import flatten
from sklego.meta import EstimatorTransformer
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn", flatten([transformer_checks, general_checks])
)
def test_estimator_checks(test_fn):
    trf = EstimatorTransformer(LinearRegression())
    test_fn(EstimatorTransformer.__name__, trf)


def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy="most_frequent")
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
    assert np.all(transformed == clf.fit(X, y).predict(X))
Ejemplo n.º 12
0
 flatten([
     # non-meta checks
     estimator_checks.check_estimators_dtypes,
     estimator_checks.check_fit_score_takes_y,
     estimator_checks.check_dtype_object,
     estimator_checks.check_sample_weights_pandas_series,
     estimator_checks.check_sample_weights_list,
     estimator_checks.check_sample_weights_invariance,
     estimator_checks.check_estimators_fit_returns_self,
     estimator_checks.check_complex_data,
     estimator_checks.check_estimators_empty_data_messages,
     estimator_checks.check_pipeline_consistency,
     estimator_checks.check_estimators_nan_inf,
     estimator_checks.check_estimators_overwrite_params,
     estimator_checks.check_estimator_sparse_data,
     estimator_checks.check_estimators_pickle,
     # general checks
     estimator_checks.check_fit2d_predict1d,
     estimator_checks.check_methods_subset_invariance,
     estimator_checks.check_fit2d_1sample,
     estimator_checks.check_fit2d_1feature,
     estimator_checks.check_fit1d,
     estimator_checks.check_get_params_invariance,
     estimator_checks.check_set_params,
     estimator_checks.check_dict_unchanged,
     estimator_checks.check_dont_overwrite_parameters,
     # outlier_checks
     # estimator_checks.check_outliers_fit_predict,
     # estimator_checks.check_outliers_train
     estimator_checks.check_classifier_data_not_an_array,
     estimator_checks.check_estimators_unfitted,
 ]),
Ejemplo n.º 13
0
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten(
        [
            transformer_checks,
            general_checks,
            # nonmeta_checks
            estimator_checks.check_estimators_dtypes,
            estimator_checks.check_fit_score_takes_y,
            estimator_checks.check_dtype_object,
            estimator_checks.check_sample_weights_pandas_series,
            estimator_checks.check_sample_weights_list,
            estimator_checks.check_sample_weights_invariance,
            estimator_checks.check_estimators_fit_returns_self,
            estimator_checks.check_complex_data,
            estimator_checks.check_estimators_empty_data_messages,
            estimator_checks.check_pipeline_consistency,
            # ColumnCapper works with nan/inf cells
            # estimator_checks.check_estimators_nan_inf,
            estimator_checks.check_estimators_overwrite_params,
            estimator_checks.check_estimator_sparse_data,
            estimator_checks.check_estimators_pickle,
        ]
    ),
)
def test_estimator_checks(test_fn):
    test_fn(ColumnCapper.__name__, ColumnCapper())

Ejemplo n.º 14
0
from tests.conftest import nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        check_shape_remains_same_regressor,
        # General checks
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        estimator_checks.check_dont_overwrite_parameters,
        # Regressor checks
        estimator_checks.check_regressor_data_not_an_array,
        estimator_checks.check_estimators_partial_fit_n_features,
        estimator_checks.check_regressors_no_decision_function,
        estimator_checks.check_supervised_y_2d,
        estimator_checks.check_supervised_y_no_nan,
        estimator_checks.check_regressors_int,
        estimator_checks.check_estimators_unfitted,
    ]))
def test_estimator_checks(test_fn):
    # Tests that are skipped:
    # 'check_methods_subset_invariance': Since we add noise, the method is not invariant on a subset
    # 'check_regressors_train': score is not always greater than 0.5 due to randomness
    regr_normal = RandomRegressor(strategy="normal")
import numpy as np
import pytest
from sklearn.cluster import DBSCAN
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Ridge, LogisticRegression

from sklego.common import flatten
from sklego.meta import SubjectiveClassifier
from tests.conftest import general_checks, classifier_checks


@pytest.mark.parametrize("test_fn",
                         flatten([general_checks, classifier_checks]))
def test_estimator_checks_classification(test_fn):
    if test_fn.__name__ == 'check_classifiers_classes':
        prior = {
            'one': 0.1,
            'two': 0.1,
            'three': 0.1,
            -1: 0.1,
            1: 0.6
        }  # nonsensical prior to make sklearn check pass
    else:
        prior = {0: 0.7, 1: 0.2, 2: 0.1}

    # Some of the sklearn checkers generate random y data with 3 classes, so prior needs to have these classes
    estimator = SubjectiveClassifier(LogisticRegression(), prior)
    test_fn(SubjectiveClassifier.__name__, estimator)


@pytest.mark.parametrize(
Ejemplo n.º 16
0
import numpy as np
import pytest

from sklego.common import flatten
from sklego.linear_model import LowessRegression
from tests.conftest import nonmeta_checks, regressor_checks, general_checks


@pytest.mark.parametrize(
    "test_fn", flatten([nonmeta_checks, general_checks, regressor_checks]))
def test_estimator_checks(test_fn):
    lowess = LowessRegression()
    test_fn(LowessRegression.__name__, lowess)


def test_obvious_usecase():
    x = np.linspace(0, 10, 100)
    X = x.reshape(-1, 1)
    y = np.ones(x.shape)
    y_pred = LowessRegression().fit(X, y).predict(X)
    assert np.isclose(y, y_pred).all()
Ejemplo n.º 17
0
import numpy as np
import pytest
from sklearn.model_selection import train_test_split

from sklego.common import flatten
from sklego.preprocessing import RandomAdder


from tests.conftest import select_tests, transformer_checks, nonmeta_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(
        flatten([general_checks, transformer_checks, nonmeta_checks]),
        exclude=[
            "check_sample_weights_invariance",
            "check_methods_subset_invariance",
            "check_transformer_data_not_an_array",
            "check_transformer_general"
        ]
    )
)
def test_estimator_checks(test_fn):
    adder = RandomAdder()
    test_fn(RandomAdder.__name__, adder)


def test_dtype_regression(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    assert RandomAdder().fit(X, y).transform(X).dtype == np.float
Ejemplo n.º 18
0
import pytest
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import LocalOutlierFactor

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector
from sklego.meta import OutlierClassifier

from tests.conftest import general_checks, select_tests


@pytest.mark.parametrize("test_fn",
                         select_tests(flatten([general_checks]),
                                      exclude=[
                                          "check_sample_weights_invariance",
                                      ]))
def test_estimator_checks(test_fn):
    mod_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    clf_quantile = OutlierClassifier(mod_quantile)
    test_fn('OutlierClassifier', clf_quantile)


@pytest.fixture
def dataset():
    np.random.seed(42)
    return np.random.normal(0, 1, (2000, 2))

Ejemplo n.º 19
0
import pytest
import numpy as np
from cvxpy import SolverError
from sklearn.linear_model import LogisticRegression

from sklego.common import flatten
from sklego.linear_model import FairClassifier
from sklego.metrics import p_percent_score
from tests.conftest import general_checks, nonmeta_checks, classifier_checks


@pytest.mark.parametrize(
    "test_fn", flatten([general_checks, nonmeta_checks, classifier_checks])
)
def test_standard_checks(test_fn):
    trf = FairClassifier(
        covariance_threshold=None,
        C=1,
        penalty="none",
        sensitive_cols=[0],
        train_sensitive_cols=True,
    )
    test_fn(FairClassifier.__name__, trf)


def _test_same(dataset):
    X, y = dataset
    if X.shape[1] == 1:
        # If we only have one column (which is also the sensitive one) we can't fit
        return True
Ejemplo n.º 20
0
import numpy as np
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier
from sklego.testing import check_shape_remains_same_classifier
from tests.conftest import nonmeta_checks, general_checks, classifier_checks


@pytest.mark.parametrize("test_fn",
                         flatten([
                             nonmeta_checks, general_checks, classifier_checks,
                             check_shape_remains_same_classifier
                         ]))
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)


def test_obvious_usecase():
    X = np.concatenate([
        np.random.normal(-10, 1, (100, 2)),
        np.random.normal(10, 1, (100, 2))
    ])
    y = np.concatenate([np.zeros(100), np.ones(100)])
    assert (GMMClassifier().fit(X, y).predict(X) == y).all()


def test_value_error_threshold():
    X = np.concatenate([
        np.random.normal(-10, 1, (100, 2)),
Ejemplo n.º 21
0
import pytest
import numpy as np
import pandas as pd

from sklearn.utils.validation import FLOAT_DTYPES
from sklego.common import flatten
from sklego.preprocessing import ColumnCapper
from tests.conftest import select_tests, transformer_checks, general_checks, nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, transformer_checks]),
                 exclude=[
                     "check_sample_weights_invariance",
                     "check_estimators_nan_inf", "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
def test_estimator_checks(test_fn):
    test_fn(ColumnCapper.__name__, ColumnCapper())


def test_quantile_range():
    def expect_type_error(quantile_range):
        with pytest.raises(TypeError):
            ColumnCapper(quantile_range)

    def expect_value_error(quantile_range):
        with pytest.raises(ValueError):
            ColumnCapper(quantile_range)
Ejemplo n.º 22
0

@pytest.mark.parametrize(
    "test_fn",
    flatten([
        # GENERAL CHECKS #
        # estimator_checks.check_fit2d_predict1d -> we only test for two classes
        # estimator_checks.check_methods_subset_invariance -> we only test for two classes
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit2d_1feature,
        estimator_checks.check_fit1d,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_set_params,
        estimator_checks.check_dict_unchanged,
        # estimator_checks.check_dont_overwrite_parameters -> we only test for two classes
        # CLASSIFIER CHECKS #
        estimator_checks.check_classifier_data_not_an_array,
        estimator_checks.check_classifiers_one_label,
        # estimator_checks.check_classifiers_classes -> we only test for two classes
        estimator_checks.check_estimators_partial_fit_n_features,
        # estimator_checks.check_classifiers_train -> we only test for two classes
        # estimator_checks.check_supervised_y_2d -> we only test for two classes
        estimator_checks.check_supervised_y_no_nan,
        estimator_checks.check_estimators_unfitted,
        estimator_checks.check_non_transformer_estimators_n_iter,
        estimator_checks.check_decision_proba_consistency,
    ]),
)
def test_standard_checks(test_fn):
    trf = Thresholder(LogisticRegression(), threshold=0.5)
    test_fn(Thresholder.__name__, trf)
Ejemplo n.º 23
0
import pytest

from sklego.common import flatten
from sklego.mixture import GMMClassifier, BayesianGMMClassifier
from sklego.testing import check_shape_remains_same_classifier
from tests.conftest import nonmeta_checks, general_checks, estimator_checks


@pytest.mark.parametrize("test_fn", flatten([
    nonmeta_checks,
    general_checks,
    estimator_checks.check_classifier_data_not_an_array,
    estimator_checks.check_classifiers_one_label,
    estimator_checks.check_classifiers_classes,
    estimator_checks.check_estimators_partial_fit_n_features,
    estimator_checks.check_classifiers_train,
    estimator_checks.check_supervised_y_2d,
    estimator_checks.check_supervised_y_no_nan,
    estimator_checks.check_estimators_unfitted,
    # estimator_checks.check_non_transformer_estimators_n_iter, our method does not have n_iter
    estimator_checks.check_decision_proba_consistency,
    check_shape_remains_same_classifier
]))
def test_estimator_checks(test_fn):
    clf = GMMClassifier()
    test_fn(GMMClassifier.__name__, clf)
    clf = BayesianGMMClassifier()
    test_fn(BayesianGMMClassifier.__name__, clf)


def test_obvious_usecase():
Ejemplo n.º 24
0
from sklego.preprocessing import InformationFilter
from tests.conftest import transformer_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        transformer_checks,
        general_checks,
        # nonmeta_checks
        estimator_checks.check_estimators_dtypes,
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_dtype_object,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_fit_returns_self,
        estimator_checks.check_complex_data,
        # this won't work because we need to select a column
        # estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_pipeline_consistency,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimator_sparse_data,
        estimator_checks.check_estimators_pickle,
    ]),
)
def test_estimator_checks(test_fn):
    test_fn(InformationFilter.__name__, InformationFilter(columns=[0]))


def test_v_columns_orthogonal():
Ejemplo n.º 25
0
from sklego.common import flatten
from sklego.meta import GroupedEstimator
from sklego.datasets import load_chicken


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimators_pickle,
        estimator_checks.check_fit2d_1sample,
        # estimator_checks.check_fit1d not tested because in 1d we cannot have both groups and data
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_set_params,
    ]),
)
def test_estimator_checks(test_fn):
    clf = GroupedEstimator(estimator=LinearRegression(),
                           groups=[0],
                           use_global_model=True)
    test_fn(GroupedEstimator.__name__ + "_fallback", clf)

    clf = GroupedEstimator(estimator=LinearRegression(),
Ejemplo n.º 26
0
import pytest
import numpy as np
from sklearn.linear_model import LogisticRegression

from sklego.common import flatten
from sklego.linear_model import EqualOpportunityClassifier
from sklego.metrics import equal_opportunity_score
from tests.conftest import general_checks, classifier_checks, select_tests, nonmeta_checks


@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks, classifier_checks]),
                 exclude=[
                     "check_sample_weights_invariance",
                     "check_sample_weights_list",
                     "check_sample_weights_pandas_series"
                 ]))
@pytest.mark.cvxpy
def test_standard_checks(test_fn):
    trf = EqualOpportunityClassifier(
        covariance_threshold=None,
        positive_target=True,
        C=1,
        penalty="none",
        sensitive_cols=[0],
        train_sensitive_cols=True,
    )
    test_fn(EqualOpportunityClassifier.__name__, trf)

Ejemplo n.º 27
0
from sklego.common import flatten
from sklego.meta import GroupedEstimator
from sklego.datasets import load_chicken


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        estimator_checks.check_fit_score_takes_y,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_estimators_empty_data_messages,
        estimator_checks.check_estimators_nan_inf,
        estimator_checks.check_estimators_overwrite_params,
        estimator_checks.check_estimators_pickle,
        estimator_checks.check_fit2d_predict1d,
        estimator_checks.check_fit2d_1sample,
        estimator_checks.check_fit1d,
        estimator_checks.check_dont_overwrite_parameters,
        estimator_checks.check_sample_weights_invariance,
        estimator_checks.check_get_params_invariance,
        estimator_checks.check_sample_weights_list,
        estimator_checks.check_sample_weights_pandas_series,
        estimator_checks.check_set_params,
    ]))
def test_estimator_checks(test_fn):
    clf = GroupedEstimator(estimator=LinearRegression(),
                           groups=[0],
                           use_fallback=True)
    test_fn(GroupedEstimator.__name__ + "_fallback", clf)

    clf = GroupedEstimator(estimator=LinearRegression(),
Ejemplo n.º 28
0
    """Test if fit_intercept and copy_X work."""
    X, y = _create_dataset(coefs, intercept, noise=2.0)
    imb = QuantileRegression(fit_intercept=False, copy_X=False)
    imb.fit(X, y)

    assert imb.intercept_ == 0.0


@pytest.mark.parametrize("test_fn", [check_shape_remains_same_regressor])
def test_quant(test_fn):
    regr = QuantileRegression()
    test_fn(QuantileRegression.__name__, regr)


@pytest.mark.parametrize(
    "regr", [(QuantileRegression.__name__, QuantileRegression()),
             (QuantileRegression.__name__ + "_positive",
              QuantileRegression(positive=True)),
             (QuantileRegression.__name__ + "_positive__no_intercept",
              QuantileRegression(positive=True, fit_intercept=False)),
             (QuantileRegression.__name__ + "_no_intercept",
              QuantileRegression(fit_intercept=False)),
             (QuantileRegression.__name__ + "_quantile",
              QuantileRegression(quantile=0.3))])
@pytest.mark.parametrize(
    "test_fn",
    select_tests(flatten([general_checks, nonmeta_checks,
                          regressor_checks]), ))
def test_estimator_checks(regr, test_fn):
    test_fn(*regr)
Ejemplo n.º 29
0
import numpy as np
import pandas as pd
import pytest
from sklearn.utils import estimator_checks

from sklego.common import flatten
from sklego.mixture import GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import nonmeta_checks, general_checks


@pytest.mark.parametrize(
    "test_fn",
    flatten([
        nonmeta_checks,
        general_checks,
        # outlier checks
        estimator_checks.check_outliers_fit_predict,
        estimator_checks.check_classifier_data_not_an_array,
        estimator_checks.check_estimators_unfitted,
    ]))
def test_estimator_checks(test_fn):
    clf_quantile = GMMOutlierDetector(threshold=0.999, method="quantile")
    test_fn(GMMOutlierDetector.__name__ + '_quantile', clf_quantile)

    clf_stddev = GMMOutlierDetector(threshold=2, method="stddev")
    test_fn(GMMOutlierDetector.__name__ + '_stddev', clf_stddev)

    bayes_clf_quantile = BayesianGMMOutlierDetector(threshold=0.999,
                                                    method="quantile")
    test_fn(BayesianGMMOutlierDetector.__name__ + '_quantile',
            bayes_clf_quantile)
from tests.conftest import nonmeta_checks, general_checks, transformer_checks


@pytest.fixture
def sample_matrix():
    np.random.seed(1313)
    return np.random.normal(size=(50, 10))


@pytest.fixture
def sample_df(sample_matrix):
    return pd.DataFrame(sample_matrix)


@pytest.mark.parametrize(
    "test_fn", flatten([nonmeta_checks, general_checks, transformer_checks])
)
def test_estimator_checks(test_fn):
    test_fn(OrthogonalTransformer.__name__, OrthogonalTransformer())


def check_is_orthogonal(X, tolerance=10 ** -5):
    """
    Check if X is an column orthogonal matrix. If X is column orthogonal, then X.T * X equals the identity matrix
    :param X: Matrix to check
    :param tolerance: Tolerance for difference caused by rounding
    :raises: AssertionError if X is not orthogonal
    """
    diff_with_eye = np.dot(X.T, X) - np.eye(X.shape[1])

    if np.max(np.abs(diff_with_eye)) > tolerance: