def make_paragraph_for_estimator_type(estimator_type): intro = nodes.list_item() intro += nodes.strong( text="Estimators that allow NaN values for type ") intro += nodes.literal(text=f"{estimator_type}") intro += nodes.strong(text=":\n") exists = False lst = nodes.bullet_list() for name, est_class in all_estimators(type_filter=estimator_type): with suppress(SkipTest): est = _construct_instance(est_class) if est._get_tags().get("allow_nan"): module_name = ".".join(est_class.__module__.split(".")[:2]) class_title = f"{est_class.__name__}" class_url = f"generated/{module_name}.{class_title}.html" item = nodes.list_item() para = nodes.paragraph() para += nodes.reference(class_title, text=class_title, internal=False, refuri=class_url) exists = True item += para lst += item intro += lst return [intro] if exists else None
def _tested_estimators(type_filter=None): for name, Estimator in all_estimators(type_filter=type_filter): try: estimator = _construct_instance(Estimator) except SkipTest: continue yield estimator
def _tested_estimators(): for name, Estimator in all_estimators(): try: estimator = _construct_instance(Estimator) except SkipTest: continue yield estimator
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip("numpydoc") from numpydoc import docscrape if Estimator.__name__ in _DOCSTRING_IGNORES: return doc = docscrape.ClassDoc(Estimator) attributes = doc["Attributes"] if Estimator.__name__ == "Pipeline": est = _construct_compose_pipeline_instance(Estimator) else: est = _construct_instance(Estimator) X, y = make_classification( n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2, ) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if "oob_score" in est.get_params(): est.set_params(oob_score=True) if is_sampler(est): est.fit_resample(X, y) else: est.fit(X, y) skipped_attributes = set([]) for attr in attributes: if attr.name in skipped_attributes: continue desc = " ".join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if "only " in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) fit_attr = _get_all_fitted_attributes(est) fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) if undocumented_attrs: raise AssertionError( f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}" )
def _tested_estimators(): for name, Estimator in all_estimators(): if issubclass(Estimator, BiclusterMixin): continue try: estimator = _construct_instance(Estimator) except SkipTest: continue yield estimator
def _tested_estimators(): for name, Estimator in all_estimators(): try: estimator = _construct_instance(Estimator) set_random_state(estimator) except SkipTest: continue if isinstance(estimator, NearMiss): # For NearMiss, let's check the three algorithms for version in (1, 2, 3): yield clone(estimator).set_params(version=version) else: yield estimator
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip("numpydoc") from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc["Attributes"] if Estimator.__name__ in ( "HalvingRandomSearchCV", "RandomizedSearchCV", "HalvingGridSearchCV", "GridSearchCV", ): est = _construct_searchcv_instance(Estimator) elif Estimator.__name__ in ( "ColumnTransformer", "Pipeline", "FeatureUnion", ): est = _construct_compose_pipeline_instance(Estimator) elif Estimator.__name__ == "SparseCoder": est = _construct_sparse_coder(Estimator) else: est = _construct_instance(Estimator) if Estimator.__name__ == "SelectKBest": est.set_params(k=2) elif Estimator.__name__ == "DummyClassifier": est.set_params(strategy="stratified") elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"): # default = 2 is invalid for single target est.set_params(n_components=1) elif Estimator.__name__ in ( "GaussianRandomProjection", "SparseRandomProjection", ): # default="auto" raises an error with the shape of `X` est.set_params(n_components=2) # FIXME: TO BE REMOVED in 1.4 (avoid FutureWarning) if Estimator.__name__ in ( "OrthogonalMatchingPursuit", "OrthogonalMatchingPursuitCV", "Lars", "LarsCV", "LassoLars", "LassoLarsCV", "LassoLarsIC", ): est.set_params(normalize=False) # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning) if Estimator.__name__ == "TSNE": est.set_params(learning_rate=200.0, init="random") # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning) if Estimator.__name__ == "SequentialFeatureSelector": est.set_params(n_features_to_select="auto") # For PLS, TODO remove in 1.1 skipped_attributes = {"x_scores_", "y_scores_"} # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning) if Estimator.__name__ == "FastICA": est.set_params(whiten="unit-variance") if Estimator.__name__.endswith("Vectorizer"): # Vectorizer require some specific input data if Estimator.__name__ in ( "CountVectorizer", "HashingVectorizer", "TfidfVectorizer", ): X = [ "This is the first document.", "This document is the second document.", "And this is the third one.", "Is this the first document?", ] elif Estimator.__name__ == "DictVectorizer": X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}] y = None else: X, y = make_classification( n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2, ) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if "1dlabels" in est._get_tags()["X_types"]: est.fit(y) elif "2dlabels" in est._get_tags()["X_types"]: est.fit(np.c_[y, y]) else: est.fit(X, y) for attr in attributes: if attr.name in skipped_attributes: continue desc = " ".join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if "only " in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) fit_attr = _get_all_fitted_attributes(est) fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) if undocumented_attrs: raise AssertionError( f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}" )
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip('numpydoc') from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc['Attributes'] if Estimator.__name__ in ( "HalvingRandomSearchCV", "RandomizedSearchCV", "HalvingGridSearchCV", "GridSearchCV", ): est = _construct_searchcv_instance(Estimator) elif Estimator.__name__ in ( "ColumnTransformer", "Pipeline", "FeatureUnion", ): est = _construct_compose_pipeline_instance(Estimator) elif Estimator.__name__ == "SparseCoder": est = _construct_sparse_coder(Estimator) else: est = _construct_instance(Estimator) if Estimator.__name__ == 'SelectKBest': est.set_params(k=2) elif Estimator.__name__ == 'DummyClassifier': est.set_params(strategy="stratified") elif Estimator.__name__ == 'CCA' or Estimator.__name__.startswith('PLS'): # default = 2 is invalid for single target est.set_params(n_components=1) elif Estimator.__name__ in ( "GaussianRandomProjection", "SparseRandomProjection", ): # default="auto" raises an error with the shape of `X` est.set_params(n_components=2) # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning) if Estimator.__name__ == 'NMF': est.set_params(init='nndsvda') # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning) if Estimator.__name__ == 'TSNE': est.set_params(learning_rate=200.0, init='random') # For PLS, TODO remove in 1.1 skipped_attributes = {"x_scores_", "y_scores_"} if Estimator.__name__.endswith("Vectorizer"): # Vectorizer require some specific input data if Estimator.__name__ in ( "CountVectorizer", "HashingVectorizer", "TfidfVectorizer", ): X = [ "This is the first document.", "This document is the second document.", "And this is the third one.", "Is this the first document?", ] elif Estimator.__name__ == "DictVectorizer": X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}] y = None else: X, y = make_classification( n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2, ) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if '1dlabels' in est._get_tags()['X_types']: est.fit(y) elif '2dlabels' in est._get_tags()['X_types']: est.fit(np.c_[y, y]) else: est.fit(X, y) module = est.__module__.split(".")[1] if module in N_FEATURES_MODULES_TO_IGNORE: skipped_attributes.add("n_features_in_") for attr in attributes: if attr.name in skipped_attributes: continue desc = ' '.join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if 'only ' in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) fit_attr = _get_all_fitted_attributes(est) fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) assert not undocumented_attrs,\ "Undocumented attributes: {}".format(undocumented_attrs)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip('numpydoc') from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc['Attributes'] IGNORED = {'ClassifierChain', 'ColumnTransformer', 'CountVectorizer', 'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection', 'MultiOutputClassifier', 'MultiOutputRegressor', 'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV', 'RegressorChain', 'SelectFromModel', 'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering', 'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer', 'VotingClassifier', 'VotingRegressor', 'SequentialFeatureSelector', 'HalvingGridSearchCV', 'HalvingRandomSearchCV'} if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'): pytest.skip("Estimator cannot be fit easily to test fit attributes") if Estimator.__name__ in ("RandomizedSearchCV", "GridSearchCV"): est = _construct_searchcv_instance(Estimator) else: est = _construct_instance(Estimator) if Estimator.__name__ == 'SelectKBest': est.k = 2 if Estimator.__name__ == 'DummyClassifier': est.strategy = "stratified" if 'PLS' in Estimator.__name__ or 'CCA' in Estimator.__name__: est.n_components = 1 # default = 2 is invalid for single target. # FIXME: TO BE REMOVED for 1.0 (avoid FutureWarning) if Estimator.__name__ == 'AffinityPropagation': est.random_state = 63 # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning) if Estimator.__name__ == 'NMF': est.init = 'nndsvda' # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning) if Estimator.__name__ == 'TSNE': est.learning_rate = 200.0 est.init = 'random' X, y = make_classification(n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if '1dlabels' in est._get_tags()['X_types']: est.fit(y) elif '2dlabels' in est._get_tags()['X_types']: est.fit(np.c_[y, y]) else: est.fit(X, y) skipped_attributes = {'x_scores_', # For PLS, TODO remove in 1.1 'y_scores_'} # For PLS, TODO remove in 1.1 module = est.__module__.split(".")[1] if module in N_FEATURES_MODULES_TO_IGNORE: skipped_attributes.add("n_features_in_") for attr in attributes: if attr.name in skipped_attributes: continue desc = ' '.join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if 'only ' in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) IGNORED = {'Birch', 'LarsCV', 'Lasso', 'OrthogonalMatchingPursuit'} if Estimator.__name__ in IGNORED: pytest.xfail( reason="Estimator has too many undocumented attributes.") fit_attr = [k for k in est.__dict__.keys() if k.endswith('_') and not k.startswith('_')] fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) assert not undocumented_attrs,\ "Undocumented attributes: {}".format(undocumented_attrs)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip('numpydoc') from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc['Attributes'] IGNORED = { 'ClassifierChain', 'ColumnTransformer', 'CountVectorizer', 'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection', 'GridSearchCV', 'MultiOutputClassifier', 'MultiOutputRegressor', 'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV', 'RandomizedSearchCV', 'RegressorChain', 'SelectFromModel', 'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering', 'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer', 'VotingClassifier', 'VotingRegressor' } if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'): pytest.skip("Estimator cannot be fit easily to test fit attributes") est = _construct_instance(Estimator) if Estimator.__name__ == 'SelectKBest': est.k = 2 if Estimator.__name__ == 'DummyClassifier': est.strategy = "stratified" # TO BE REMOVED for v0.25 (avoid FutureWarning) if Estimator.__name__ == 'AffinityPropagation': est.random_state = 63 X, y = make_classification(n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if '1dlabels' in est._get_tags()['X_types']: est.fit(y) elif '2dlabels' in est._get_tags()['X_types']: est.fit(np.c_[y, y]) else: est.fit(X, y) skipped_attributes = {'n_features_in_'} for attr in attributes: if attr.name in skipped_attributes: continue desc = ' '.join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if 'only ' in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) IGNORED = { 'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB', 'KernelCenterer', 'LarsCV', 'Lasso', 'LassoLarsIC', 'MiniBatchKMeans', 'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSSVD', 'PassiveAggressiveClassifier' } if Estimator.__name__ in IGNORED: pytest.xfail(reason="Estimator has too many undocumented attributes.") fit_attr = [ k for k in est.__dict__.keys() if k.endswith('_') and not k.startswith('_') ] fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) assert not undocumented_attrs,\ "Undocumented attributes: {}".format(undocumented_attrs)