Beispiel #1
0
def test_non_transformer_estimators_n_iter():
    # Test that all estimators of type which are non-transformer
    # and which have an attribute of max_iter, return the attribute
    # of n_iter atleast 1.
    for est_type in ['regressor', 'classifier', 'cluster']:
        regressors = all_estimators(type_filter=est_type)
        for name, Estimator in regressors:
            # LassoLars stops early for the default alpha=1.0 for
            # the iris dataset.
            if name == 'LassoLars':
                estimator = Estimator(alpha=0.)
            else:
                estimator = Estimator()
            if hasattr(estimator, "max_iter"):
                # These models are dependent on external solvers like
                # libsvm and accessing the iter parameter is non-trivial.
                if name in (['Ridge', 'SVR', 'NuSVR', 'NuSVC',
                             'RidgeClassifier', 'SVC', 'RandomizedLasso',
                             'LogisticRegressionCV']):
                    continue

                # Tested in test_transformer_n_iter below
                elif (name in CROSS_DECOMPOSITION or
                      name in ['LinearSVC', 'LogisticRegression']):
                    continue

                else:
                    # Multitask models related to ENet cannot handle
                    # if y is mono-output.
                    yield (check_non_transformer_estimators_n_iter,
                           name, estimator, 'Multi' in name)
Beispiel #2
0
def test_classifiers():
    # test if classifiers can cope with non-consecutive classes
    classifiers = all_estimators(type_filter='classifier')
    for name, Classifier in classifiers:
        # test classfiers can handle non-array data
        yield check_classifier_data_not_an_array, name, Classifier
        # test classifiers trained on a single label always return this label
        yield check_classifiers_one_label, name, Classifier
        yield check_classifiers_classes, name, Classifier
        yield check_classifiers_pickle, name, Classifier
        yield check_estimators_partial_fit_n_features, name, Classifier
        # basic consistency testing
        yield check_classifiers_train, name, Classifier
        if (name
                not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]
                # TODO some complication with -1 label
                and name
                not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]):
            # We don't raise a warning in these classifiers, as
            # the column y interface is used by the forests.

            # test if classifiers can cope with y.shape = (n_samples, 1)
            yield check_classifiers_input_shapes, name, Classifier
        # test if NotFittedError is raised
        yield check_estimators_unfitted, name, Classifier
Beispiel #3
0
def test_non_transformer_estimators_n_iter():
    # Test that all estimators of type which are non-transformer
    # and which have an attribute of max_iter, return the attribute
    # of n_iter atleast 1.
    for est_type in ['regressor', 'classifier', 'cluster']:
        regressors = all_estimators(type_filter=est_type)
        for name, Estimator in regressors:
            # LassoLars stops early for the default alpha=1.0 for
            # the iris dataset.
            if name == 'LassoLars':
                estimator = Estimator(alpha=0.)
            else:
                estimator = Estimator()
            if hasattr(estimator, "max_iter"):
                # These models are dependent on external solvers like
                # libsvm and accessing the iter parameter is non-trivial.
                if name in ([
                        'Ridge', 'SVR', 'NuSVR', 'NuSVC', 'RidgeClassifier',
                        'SVC', 'RandomizedLasso', 'LogisticRegressionCV'
                ]):
                    continue

                # Tested in test_transformer_n_iter below
                elif (name in CROSS_DECOMPOSITION
                      or name in ['LinearSVC', 'LogisticRegression']):
                    continue

                else:
                    # Multitask models related to ENet cannot handle
                    # if y is mono-output.
                    yield (check_non_transformer_estimators_n_iter, name,
                           estimator, 'Multi' in name)
Beispiel #4
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if name.endswith("HMM") or name.startswith("_"):
            continue
        if name not in CROSS_DECOMPOSITION:
            yield check_estimators_dtypes, name, Estimator
            yield check_fit_score_takes_y, name, Estimator
            yield check_dtype_object, name, Estimator

            # Check that all estimator yield informative messages when
            # trained on empty datasets
            yield check_estimators_empty_data_messages, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['SpectralEmbedding']:
            # SpectralEmbedding is non-deterministic,
            # see issue #4236
            yield check_pipeline_consistency, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['Imputer']:
            # Test that all estimators check their input for NaN's and infs
            yield check_estimators_nan_inf, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['GaussianProcess']:
            # FIXME!
            # in particular GaussianProcess!
            yield check_estimators_overwrite_params, name, Estimator
        if hasattr(Estimator, 'sparsify'):
            yield check_sparsify_coefficients, name, Estimator

        yield check_estimator_sparse_data, name, Estimator
Beispiel #5
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if name.endswith("HMM") or name.startswith("_"):
            continue
        if name not in CROSS_DECOMPOSITION:
            yield check_estimators_dtypes, name, Estimator
            yield check_fit_score_takes_y, name, Estimator
            yield check_dtype_object, name, Estimator

            # Check that all estimator yield informative messages when
            # trained on empty datasets
            yield check_estimators_empty_data_messages, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['SpectralEmbedding']:
            # SpectralEmbedding is non-deterministic,
            # see issue #4236
            yield check_pipeline_consistency, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['Imputer']:
            # Test that all estimators check their input for NaN's and infs
            yield check_estimators_nan_inf, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['GaussianProcess']:
            # FIXME!
            # in particular GaussianProcess!
            yield check_estimators_overwrite_params, name, Estimator
        if hasattr(Estimator, 'sparsify'):
            yield check_sparsify_coefficients, name, Estimator

        yield check_estimator_sparse_data, name, Estimator
Beispiel #6
0
def test_transformer_n_iter():
    transformers = all_estimators(type_filter='transformer')
    for name, Estimator in transformers:
        estimator = Estimator()
        # Dependent on external solvers and hence accessing the iter
        # param is non-trivial.
        external_solver = ['Isomap', 'KernelPCA', 'LocallyLinearEmbedding',
                           'RandomizedLasso', 'LogisticRegressionCV']

        if hasattr(estimator, "max_iter") and name not in external_solver:
            yield check_transformer_n_iter, name, estimator
Beispiel #7
0
def test_all_estimators():
    # Test that estimators are default-constructible, clonable
    # and have working repr.
    estimators = all_estimators(include_meta_estimators=True)

    # Meta sanity-check to make sure that the estimator introspection runs
    # properly
    assert_greater(len(estimators), 0)

    for name, Estimator in estimators:
        # some can just not be sensibly default constructed
        yield check_parameters_default_constructible, name, Estimator
Beispiel #8
0
def test_clustering():
    # test if clustering algorithms do something sensible
    # also test all shapes / shape errors
    clustering = all_estimators(type_filter='cluster')
    for name, Alg in clustering:
        # test whether any classifier overwrites his init parameters during fit
        yield check_clusterer_compute_labels_predict, name, Alg
        if name not in ('WardAgglomeration', "FeatureAgglomeration"):
            # this is clustering on the features
            # let's not test that here.
            yield check_clustering, name, Alg
            yield check_estimators_partial_fit_n_features, name, Alg
Beispiel #9
0
def test_clustering():
    # test if clustering algorithms do something sensible
    # also test all shapes / shape errors
    clustering = all_estimators(type_filter='cluster')
    for name, Alg in clustering:
        # test whether any classifier overwrites his init parameters during fit
        yield check_clusterer_compute_labels_predict, name, Alg
        if name not in ('WardAgglomeration', "FeatureAgglomeration"):
            # this is clustering on the features
            # let's not test that here.
            yield check_clustering, name, Alg
            yield check_estimators_partial_fit_n_features, name, Alg
Beispiel #10
0
def test_all_estimators():
    # Test that estimators are default-constructible, clonable
    # and have working repr.
    estimators = all_estimators(include_meta_estimators=True)

    # Meta sanity-check to make sure that the estimator introspection runs
    # properly
    assert_greater(len(estimators), 0)

    for name, Estimator in estimators:
        # some can just not be sensibly default constructed
        yield check_parameters_default_constructible, name, Estimator
Beispiel #11
0
def test_transformer_n_iter():
    transformers = all_estimators(type_filter='transformer')
    for name, Estimator in transformers:
        estimator = Estimator()
        # Dependent on external solvers and hence accessing the iter
        # param is non-trivial.
        external_solver = [
            'Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso',
            'LogisticRegressionCV'
        ]

        if hasattr(estimator, "max_iter") and name not in external_solver:
            yield check_transformer_n_iter, name, estimator
Beispiel #12
0
def test_transformers():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    for name, Transformer in transformers:
        # All transformers should either deal with sparse data or raise an
        # exception with type TypeError and an intelligible error message
        yield check_transformer_pickle, name, Transformer
        if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer',
                        'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']:
            yield check_transformer_data_not_an_array, name, Transformer
        # these don't actually fit the data, so don't raise errors
        if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            # basic tests
            yield check_transformer, name, Transformer
            yield check_transformers_unfitted, name, Transformer
Beispiel #13
0
def test_regressors():
    regressors = all_estimators(type_filter='regressor')
    # TODO: test with intercept
    # TODO: test with multiple responses
    for name, Regressor in regressors:
        # basic testing
        yield check_regressors_train, name, Regressor
        yield check_regressor_data_not_an_array, name, Regressor
        yield check_estimators_partial_fit_n_features, name, Regressor
        # Test that estimators can be pickled, and once pickled
        # give the same answer as before.
        yield check_regressors_pickle, name, Regressor
        if name != 'CCA':
            # check that the regressor handles int input
            yield check_regressors_int, name, Regressor
        # Test if NotFittedError is raised
        yield check_estimators_unfitted, name, Regressor
Beispiel #14
0
def test_regressors():
    regressors = all_estimators(type_filter='regressor')
    # TODO: test with intercept
    # TODO: test with multiple responses
    for name, Regressor in regressors:
        # basic testing
        yield check_regressors_train, name, Regressor
        yield check_regressor_data_not_an_array, name, Regressor
        yield check_estimators_partial_fit_n_features, name, Regressor
        # Test that estimators can be pickled, and once pickled
        # give the same answer as before.
        yield check_regressors_pickle, name, Regressor
        if name != 'CCA':
            # check that the regressor handles int input
            yield check_regressors_int, name, Regressor
        # Test if NotFittedError is raised
        yield check_estimators_unfitted, name, Regressor
Beispiel #15
0
def test_class_weight_auto_linear_classifiers():
    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        linear_classifiers = [(name, clazz) for name, clazz in classifiers
                              if 'class_weight' in clazz().get_params().keys()
                              and issubclass(clazz, LinearClassifierMixin)]

    for name, Classifier in linear_classifiers:
        if name == "LogisticRegressionCV":
            # Contrary to RidgeClassifierCV, LogisticRegressionCV use actual
            # CV folds and fit a model for each CV iteration before averaging
            # the coef. Therefore it is expected to not behave exactly as the
            # other linear model.
            continue
        yield check_class_weight_auto_linear_classifier, name, Classifier
Beispiel #16
0
def test_transformers():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    for name, Transformer in transformers:
        # All transformers should either deal with sparse data or raise an
        # exception with type TypeError and an intelligible error message
        yield check_transformer_pickle, name, Transformer
        if name not in [
                'AdditiveChi2Sampler', 'Binarizer', 'Normalizer',
                'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'
        ]:
            yield check_transformer_data_not_an_array, name, Transformer
        # these don't actually fit the data, so don't raise errors
        if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            # basic tests
            yield check_transformer, name, Transformer
            yield check_transformers_unfitted, name, Transformer
Beispiel #17
0
def test_class_weight_classifiers():
    # test that class_weight works and that the semantics are consistent
    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers
                       if 'class_weight' in c[1]().get_params().keys()]

    for name, Classifier in classifiers:
        if name == "NuSVC":
            # the sparse version has a parameter that doesn't do anything
            continue
        if name.endswith("NB"):
            # NaiveBayes classifiers have a somewhat different interface.
            # FIXME SOON!
            continue
        yield check_class_weight_classifiers, name, Classifier
Beispiel #18
0
def test_class_weight_auto_classifiers():
    # Test that class_weight="auto" improves f1-score

    # This test is broken; its success depends on:
    # * a rare fortuitous RNG seed for make_classification; and
    # * the use of binary F1 over a seemingly arbitrary positive class for two
    #   datasets, and weighted average F1 for the third.
    # Its expectations need to be clarified and reimplemented.
    raise SkipTest('This test requires redefinition')

    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [
            c for c in classifiers
            if 'class_weight' in c[1]().get_params().keys()
        ]

    for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]):
        # create unbalanced dataset
        X, y = make_classification(n_classes=n_classes,
                                   n_samples=200,
                                   n_features=10,
                                   weights=weights,
                                   random_state=0,
                                   n_informative=n_classes)
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.5,
                                                            random_state=0)
        for name, Classifier in classifiers:
            if (name != "NuSVC"
                    # the sparse version has a parameter that doesn't do anything
                    and not name.startswith("RidgeClassifier")
                    # RidgeClassifier behaves unexpected
                    # FIXME!
                    and not name.endswith("NB")):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                yield (check_class_weight_auto_classifiers, name, Classifier,
                       X_train, y_train, X_test, y_test, weights)
Beispiel #19
0
def test_class_weight_auto_linear_classifiers():
    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        linear_classifiers = [
            (name, clazz)
            for name, clazz in classifiers
            if 'class_weight' in clazz().get_params().keys()
               and issubclass(clazz, LinearClassifierMixin)]

    for name, Classifier in linear_classifiers:
        if name == "LogisticRegressionCV":
            # Contrary to RidgeClassifierCV, LogisticRegressionCV use actual
            # CV folds and fit a model for each CV iteration before averaging
            # the coef. Therefore it is expected to not behave exactly as the
            # other linear model.
            continue
        yield check_class_weight_auto_linear_classifier, name, Classifier
Beispiel #20
0
def test_class_weight_classifiers():
    # test that class_weight works and that the semantics are consistent
    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [
            c for c in classifiers
            if 'class_weight' in c[1]().get_params().keys()
        ]

    for name, Classifier in classifiers:
        if name == "NuSVC":
            # the sparse version has a parameter that doesn't do anything
            continue
        if name.endswith("NB"):
            # NaiveBayes classifiers have a somewhat different interface.
            # FIXME SOON!
            continue
        yield check_class_weight_classifiers, name, Classifier
Beispiel #21
0
def test_classifiers():
    # test if classifiers can cope with non-consecutive classes
    classifiers = all_estimators(type_filter='classifier')
    for name, Classifier in classifiers:
        # test classfiers can handle non-array data
        yield check_classifier_data_not_an_array, name, Classifier
        # test classifiers trained on a single label always return this label
        yield check_classifiers_one_label, name, Classifier
        yield check_classifiers_classes, name, Classifier
        yield check_classifiers_pickle, name, Classifier
        yield check_estimators_partial_fit_n_features, name, Classifier
        # basic consistency testing
        yield check_classifiers_train, name, Classifier
        if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"]
            # TODO some complication with -1 label
                and name not in ["DecisionTreeClassifier",
                                 "ExtraTreeClassifier"]):
                # We don't raise a warning in these classifiers, as
                # the column y interface is used by the forests.

            # test if classifiers can cope with y.shape = (n_samples, 1)
            yield check_classifiers_input_shapes, name, Classifier
        # test if NotFittedError is raised
        yield check_estimators_unfitted, name, Classifier
Beispiel #22
0
def test_class_weight_auto_classifiers():
    # Test that class_weight="auto" improves f1-score

    # This test is broken; its success depends on:
    # * a rare fortuitous RNG seed for make_classification; and
    # * the use of binary F1 over a seemingly arbitrary positive class for two
    #   datasets, and weighted average F1 for the third.
    # Its expectations need to be clarified and reimplemented.
    raise SkipTest('This test requires redefinition')

    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers
                       if 'class_weight' in c[1]().get_params().keys()]

    for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]):
        # create unbalanced dataset
        X, y = make_classification(n_classes=n_classes, n_samples=200,
                                   n_features=10, weights=weights,
                                   random_state=0, n_informative=n_classes)
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
                                                            random_state=0)
        for name, Classifier in classifiers:
            if (name != "NuSVC"
                # the sparse version has a parameter that doesn't do anything
                    and not name.startswith("RidgeClassifier")
                    # RidgeClassifier behaves unexpected
                    # FIXME!
                    and not name.endswith("NB")):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                yield (check_class_weight_auto_classifiers, name, Classifier,
                       X_train, y_train, X_test, y_test, weights)
Beispiel #23
0
def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert_false(name.lower().startswith('base'), msg=msg)
Beispiel #24
0
def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert_false(name.lower().startswith('base'), msg=msg)