def test_get_params_invariance(): # Test for estimators that support get_params, that # get_params(deep=False) is a subset of get_params(deep=True) # Related to issue #4465 estimators = all_estimators(include_meta_estimators=False, include_other=True) for name, Estimator in estimators: if hasattr(Estimator, "get_params"): # If class is deprecated, ignore deprecated warnings if hasattr(Estimator.__init__, "deprecated_original"): with ignore_warnings(): yield _named_check(check_get_params_invariance, name), name, Estimator else: yield _named_check(check_get_params_invariance, name), name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): if issubclass(Estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): yield _named_check(check, name), name, Estimator else: yield _named_check(check, name), name, Estimator
def test_non_transformer_estimators_n_iter(): # Test that all estimators of type which are non-transformer # and which have an attribute of max_iter, return the attribute # of n_iter atleast 1. for est_type in ['regressor', 'classifier', 'cluster']: regressors = all_estimators(type_filter=est_type) for name, Estimator in regressors: # LassoLars stops early for the default alpha=1.0 for # the iris dataset. if name == 'LassoLars': estimator = Estimator(alpha=0.) else: with ignore_warnings(category=DeprecationWarning): estimator = Estimator() if hasattr(estimator, "max_iter"): # These models are dependent on external solvers like # libsvm and accessing the iter parameter is non-trivial. if name in (['Ridge', 'SVR', 'NuSVR', 'NuSVC', 'RidgeClassifier', 'SVC', 'RandomizedLasso', 'LogisticRegressionCV']): continue # Tested in test_transformer_n_iter below elif (name in CROSS_DECOMPOSITION or name in ['LinearSVC', 'LogisticRegression']): continue else: # Multitask models related to ENet cannot handle # if y is mono-output. yield (_named_check( check_non_transformer_estimators_n_iter, name), name, estimator, 'Multi' in name)
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue estimator = Estimator() # check this on class yield _named_check(check_no_fit_attributes_set_in_init, name), name, Estimator for check in _yield_all_checks(name, estimator): set_checking_parameters(estimator) yield _named_check(check, name), name, estimator
def test_non_transformer_estimators_n_iter(): # Test that all estimators of type which are non-transformer # and which have an attribute of max_iter, return the attribute # of n_iter atleast 1. for est_type in ['regressor', 'classifier', 'cluster']: regressors = all_estimators(type_filter=est_type) for name, Estimator in regressors: # LassoLars stops early for the default alpha=1.0 for # the iris dataset. if name == 'LassoLars': estimator = Estimator(alpha=0.) else: with ignore_warnings(category=DeprecationWarning): estimator = Estimator() if hasattr(estimator, "max_iter"): # These models are dependent on external solvers like # libsvm and accessing the iter parameter is non-trivial. if name in ([ 'Ridge', 'SVR', 'NuSVR', 'NuSVC', 'RidgeClassifier', 'SVC', 'RandomizedLasso', 'LogisticRegressionCV' ]): continue # Tested in test_transformer_n_iter below elif (name in CROSS_DECOMPOSITION or name in ['LinearSVC', 'LogisticRegression']): continue else: # Multitask models related to ENet cannot handle # if y is mono-output. yield (_named_check( check_non_transformer_estimators_n_iter, name), name, estimator, 'Multi' in name)
def test_all_estimators(): estimators = all_estimators(include_meta_estimators=True) assert len(estimators) > 0 for name, Estimator in estimators: # some can just not be sensibly default constructed yield (_named_check(check_estimator, name), Estimator)
def test_get_params_invariance(): # Test for estimators that support get_params, that # get_params(deep=False) is a subset of get_params(deep=True) # Related to issue #4465 estimators = all_estimators(include_meta_estimators=False, include_other=True) for name, Estimator in estimators: if hasattr(Estimator, 'get_params'): # If class is deprecated, ignore deprecated warnings if hasattr(Estimator.__init__, "deprecated_original"): with ignore_warnings(): yield _named_check(check_get_params_invariance, name), name, Estimator else: yield _named_check(check_get_params_invariance, name), name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, Estimator elif name == 'kprototypes': for check in _yield_all_checks(name, Estimator): # Only do these if check.__name__ in ('check_estimator_sparse_data', 'check_clusterer_compute_labels_predict', 'check_estimators_partial_fit_n_features'): yield _named_check(check, name), name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): yield _named_check(check, name), name, Estimator
def test_averaging_multilabel_all_ones(): y_true = np.ones((20, 3)) y_pred = np.ones((20, 3)) y_score = np.ones((20, 3)) y_true_binarize = y_true y_pred_binarize = y_pred for name in METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): yield _named_check(check, name), name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: estimator = Estimator() if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, estimator
def test_transformer_n_iter(): transformers = all_estimators(type_filter="transformer") for name, Estimator in transformers: if issubclass(Estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): estimator = Estimator() else: estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = ["Isomap", "KernelPCA", "LocallyLinearEmbedding", "RandomizedLasso", "LogisticRegressionCV"] if hasattr(estimator, "max_iter") and name not in external_solver: if isinstance(estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): yield _named_check(check_transformer_n_iter, name), name, estimator else: yield _named_check(check_transformer_n_iter, name), name, estimator
def test_all_estimators(): # Test that estimators are default-constructible, cloneable # and have working repr. estimators = all_estimators(include_meta_estimators=True) # Meta sanity-check to make sure that the estimator introspection runs # properly assert_greater(len(estimators), 0) for name, Estimator in estimators: # some can just not be sensibly default constructed yield (_named_check(check_parameters_default_constructible, name), name, Estimator)
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): yield _named_check(check, name), name, Estimator logger_name = Estimator().logger.name class_module_name = Estimator.__module__ assert logger_name == class_module_name
def test_averaging_multilabel(n_classes=5, n_samples=40): _, y = make_multilabel_classification( n_features=1, n_classes=n_classes, random_state=5, n_samples=n_samples, allow_unlabeled=False ) y_true = y[:20] y_pred = y[20:] y_score = check_random_state(0).normal(size=(20, n_classes)) y_true_binarize = y_true y_pred_binarize = y_pred for name in METRICS_WITH_AVERAGING + THRESHOLDED_METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_averaging_multiclass(n_samples=50, n_classes=3): random_state = check_random_state(0) y_true = random_state.randint(0, n_classes, size=(n_samples,)) y_pred = random_state.randint(0, n_classes, size=(n_samples,)) y_score = random_state.uniform(size=(n_samples, n_classes)) lb = LabelBinarizer().fit(y_true) y_true_binarize = lb.transform(y_true) y_pred_binarize = lb.transform(y_pred) for name in METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_sample_weight_invariance(n_samples=50): random_state = check_random_state(0) # binary random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(n_samples, )) y_pred = random_state.randint(0, 2, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples,)) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_score else: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_pred # multiclass random_state = check_random_state(0) y_true = random_state.randint(0, 5, size=(n_samples, )) y_pred = random_state.randint(0, 5, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples, 5)) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY_MULTICLASS): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_score else: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_pred # multilabel indicator _, ya = make_multilabel_classification(n_features=1, n_classes=20, random_state=0, n_samples=100, allow_unlabeled=False) _, yb = make_multilabel_classification(n_features=1, n_classes=20, random_state=1, n_samples=100, allow_unlabeled=False) y_true = np.vstack([ya, yb]) y_pred = np.vstack([ya, ya]) y_score = random_state.randint(1, 4, size=y_true.shape) for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS + MULTIOUTPUT_METRICS): if name in METRICS_WITHOUT_SAMPLE_WEIGHT: continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield (_named_check(check_sample_weight_invariance, name), name, metric, y_true, y_score) else: yield (_named_check(check_sample_weight_invariance, name), name, metric, y_true, y_pred)
def test_averaging_multiclass(n_samples=50, n_classes=3): random_state = check_random_state(0) y_true = random_state.randint(0, n_classes, size=(n_samples, )) y_pred = random_state.randint(0, n_classes, size=(n_samples, )) y_score = random_state.uniform(size=(n_samples, n_classes)) lb = LabelBinarizer().fit(y_true) y_true_binarize = lb.transform(y_true) y_pred_binarize = lb.transform(y_pred) for name in METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_class_weight_balanced_linear_classifiers(): classifiers = all_estimators(type_filter="classifier") clean_warning_registry() with warnings.catch_warnings(record=True): linear_classifiers = [ (name, clazz) for name, clazz in classifiers if ("class_weight" in clazz().get_params().keys() and issubclass(clazz, LinearClassifierMixin)) ] for name, Classifier in linear_classifiers: yield _named_check(check_class_weight_balanced_linear_classifier, name), name, Classifier
def test_averaging_multilabel(n_classes=5, n_samples=40): _, y = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=5, n_samples=n_samples, allow_unlabeled=False) y_true = y[:20] y_pred = y[20:] y_score = check_random_state(0).normal(size=(20, n_classes)) y_true_binarize = y_true y_pred_binarize = y_pred for name in METRICS_WITH_AVERAGING + THRESHOLDED_METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)
def test_transformer_n_iter(): transformers = all_estimators(type_filter='transformer') for name, Estimator in transformers: with ignore_warnings(category=DeprecationWarning): estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = ['Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso', 'LogisticRegressionCV'] if hasattr(estimator, "max_iter") and name not in external_solver: yield _named_check( check_transformer_n_iter, name), name, estimator
def test_class_weight_balanced_linear_classifiers(): classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): linear_classifiers = [ (name, clazz) for name, clazz in classifiers if ('class_weight' in clazz().get_params().keys() and issubclass(clazz, LinearClassifierMixin)) ] for name, Classifier in linear_classifiers: yield _named_check(check_class_weight_balanced_linear_classifier, name), name, Classifier
def test_transformer_n_iter(): transformers = all_estimators(type_filter='transformer') for name, Estimator in transformers: with ignore_warnings(category=DeprecationWarning): estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = [ 'Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso', 'LogisticRegressionCV' ] if hasattr(estimator, "max_iter") and name not in external_solver: yield _named_check(check_transformer_n_iter, name), name, estimator
def test_transformer_n_iter(): transformers = all_estimators(type_filter='transformer') for name, Estimator in transformers: if issubclass(Estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): estimator = Estimator() else: estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = ['Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso', 'LogisticRegressionCV'] if hasattr(estimator, "max_iter") and name not in external_solver: if isinstance(estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): yield _named_check( check_transformer_n_iter, name), name, estimator else: yield _named_check( check_transformer_n_iter, name), name, estimator
def test_averaging_multilabel_all_zeroes(): y_true = np.zeros((20, 3)) y_pred = np.zeros((20, 3)) y_score = np.zeros((20, 3)) y_true_binarize = y_true y_pred_binarize = y_pred for name in METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score) # Test _average_binary_score for weight.sum() == 0 binary_metric = lambda y_true, y_score, average="macro": _average_binary_score( precision_score, y_true, y_score, average ) _check_averaging(binary_metric, y_true, y_pred, y_true_binarize, y_pred_binarize, is_multilabel=True)
def test_averaging_multilabel_all_zeroes(): y_true = np.zeros((20, 3)) y_pred = np.zeros((20, 3)) y_score = np.zeros((20, 3)) y_true_binarize = y_true y_pred_binarize = y_pred for name in METRICS_WITH_AVERAGING: yield (_named_check(check_averaging, name), name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score) # Test _average_binary_score for weight.sum() == 0 binary_metric = (lambda y_true, y_score, average="macro": _average_binary_score( precision_score, y_true, y_score, average)) _check_averaging(binary_metric, y_true, y_pred, y_true_binarize, y_pred_binarize, is_multilabel=True)
def test_non_transformer_estimators_n_iter(): # Test that all estimators of type which are non-transformer # and which have an attribute of max_iter, return the attribute # of n_iter atleast 1. for est_type in ["regressor", "classifier", "cluster"]: regressors = all_estimators(type_filter=est_type) for name, Estimator in regressors: # LassoLars stops early for the default alpha=1.0 for # the iris dataset. if name == "LassoLars": estimator = Estimator(alpha=0.0) else: estimator = Estimator() if hasattr(estimator, "max_iter"): # These models are dependent on external solvers like # libsvm and accessing the iter parameter is non-trivial. if name in ( [ "Ridge", "SVR", "NuSVR", "NuSVC", "RidgeClassifier", "SVC", "RandomizedLasso", "LogisticRegressionCV", ] ): continue # Tested in test_transformer_n_iter below elif name in CROSS_DECOMPOSITION or name in ["LinearSVC", "LogisticRegression"]: continue else: # Multitask models related to ENet cannot handle # if y is mono-output. yield ( _named_check(check_non_transformer_estimators_n_iter, name), name, estimator, "Multi" in name, )
def test_all_estimators(): estimators = all_estimators(include_meta_estimators=True) assert_greater(len(estimators), 0) for name, Estimator in estimators: # some can just not be sensibly default constructed yield (_named_check(check_estimator, name), Estimator)
def test_sample_weight_invariance(n_samples=50): random_state = check_random_state(0) # binary random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(n_samples, )) y_pred = random_state.randint(0, 2, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples, )) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_score else: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_pred # multiclass random_state = check_random_state(0) y_true = random_state.randint(0, 5, size=(n_samples, )) y_pred = random_state.randint(0, 5, size=(n_samples, )) y_score = random_state.random_sample(size=(n_samples, 5)) for name in ALL_METRICS: if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or name in METRIC_UNDEFINED_BINARY_MULTICLASS): continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_score else: yield _named_check(check_sample_weight_invariance, name), name,\ metric, y_true, y_pred # multilabel indicator _, ya = make_multilabel_classification(n_features=1, n_classes=20, random_state=0, n_samples=100, allow_unlabeled=False) _, yb = make_multilabel_classification(n_features=1, n_classes=20, random_state=1, n_samples=100, allow_unlabeled=False) y_true = np.vstack([ya, yb]) y_pred = np.vstack([ya, ya]) y_score = random_state.randint(1, 4, size=y_true.shape) for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS + MULTIOUTPUT_METRICS): if name in METRICS_WITHOUT_SAMPLE_WEIGHT: continue metric = ALL_METRICS[name] if name in THRESHOLDED_METRICS: yield (_named_check(check_sample_weight_invariance, name), name, metric, y_true, y_score) else: yield (_named_check(check_sample_weight_invariance, name), name, metric, y_true, y_pred)