def test_docstring_parameters(): # Test module docstring formatting # Skip test if numpydoc is not found pytest.importorskip('numpydoc', reason="numpydoc is required to test the docstrings") # XXX unreached code as of v0.22 from numpydoc import docscrape incorrect = [] for name in PUBLIC_MODULES: if name == 'sklearn.utils.fixes': # We cannot always control these docstrings continue with warnings.catch_warnings(record=True): module = importlib.import_module(name) classes = inspect.getmembers(module, inspect.isclass) # Exclude imported classes classes = [cls for cls in classes if cls[1].__module__ == name] for cname, cls in classes: this_incorrect = [] if cname in _DOCSTRING_IGNORES or cname.startswith('_'): continue if inspect.isabstract(cls): continue with warnings.catch_warnings(record=True) as w: cdoc = docscrape.ClassDoc(cls) if len(w): raise RuntimeError('Error for __init__ of %s in %s:\n%s' % (cls, name, w[0])) cls_init = getattr(cls, '__init__', None) if _is_deprecated(cls_init): continue elif cls_init is not None: this_incorrect += check_docstring_parameters( cls.__init__, cdoc) for method_name in cdoc.methods: method = getattr(cls, method_name) if _is_deprecated(method): continue param_ignore = None # Now skip docstring test for y when y is None # by default for API reason if method_name in _METHODS_IGNORE_NONE_Y: sig = signature(method) if ('y' in sig.parameters and sig.parameters['y'].default is None): param_ignore = ['y'] # ignore y for fit and score result = check_docstring_parameters(method, ignore=param_ignore) this_incorrect += result incorrect += this_incorrect functions = inspect.getmembers(module, inspect.isfunction) # Exclude imported functions functions = [fn for fn in functions if fn[1].__module__ == name] for fname, func in functions: # Don't test private methods / functions if fname.startswith('_'): continue if fname == "configuration" and name.endswith("setup"): continue name_ = _get_func_name(func) if (not any(d in name_ for d in _DOCSTRING_IGNORES) and not _is_deprecated(func)): incorrect += check_docstring_parameters(func) msg = '\n'.join(incorrect) if len(incorrect) > 0: raise AssertionError("Docstring Error:\n" + msg)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip('numpydoc') from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc['Attributes'] IGNORED = { 'ClassifierChain', 'ColumnTransformer', 'CountVectorizer', 'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection', 'GridSearchCV', 'MultiOutputClassifier', 'MultiOutputRegressor', 'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OneVsRestClassifier', 'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV', 'RandomizedSearchCV', 'RegressorChain', 'SelectFromModel', 'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering', 'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer', 'VotingClassifier', 'VotingRegressor' } if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'): pytest.skip("Estimator cannot be fit easily to test fit attributes") est = Estimator() if Estimator.__name__ == 'SelectKBest': est.k = 2 if Estimator.__name__ == 'DummyClassifier': est.strategy = "stratified" # TO BE REMOVED for v0.25 (avoid FutureWarning) if Estimator.__name__ == 'AffinityPropagation': est.random_state = 63 X, y = make_classification(n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if '1dlabels' in est._get_tags()['X_types']: est.fit(y) elif '2dlabels' in est._get_tags()['X_types']: est.fit(np.c_[y, y]) else: est.fit(X, y) skipped_attributes = {'n_features_in_'} for attr in attributes: if attr.name in skipped_attributes: continue desc = ' '.join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if 'only ' not in desc: assert hasattr(est, attr.name) IGNORED = { 'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB', 'ElasticNet', 'ElasticNetCV', 'GaussianProcessClassifier', 'GradientBoostingRegressor', 'HistGradientBoostingClassifier', 'HistGradientBoostingRegressor', 'IsolationForest', 'KNeighborsClassifier', 'KNeighborsRegressor', 'KNeighborsTransformer', 'KernelCenterer', 'KernelDensity', 'LarsCV', 'Lasso', 'LassoLarsCV', 'LassoLarsIC', 'LatentDirichletAllocation', 'LocalOutlierFactor', 'MDS', 'MiniBatchKMeans', 'MLPClassifier', 'MLPRegressor', 'MultiTaskElasticNet', 'MultiTaskElasticNetCV', 'MultiTaskLasso', 'MultiTaskLassoCV', 'NearestNeighbors', 'NuSVR', 'OneClassSVM', 'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSRegression', 'PLSSVD', 'PassiveAggressiveClassifier', 'Perceptron', 'RBFSampler', 'RadiusNeighborsClassifier', 'RadiusNeighborsRegressor', 'RadiusNeighborsTransformer', 'RandomTreesEmbedding', 'SVR', 'SkewedChi2Sampler' } if Estimator.__name__ in IGNORED: pytest.xfail(reason="Classifier has too many undocumented attributes.") fit_attr = [ k for k in est.__dict__.keys() if k.endswith('_') and not k.startswith('_') ] fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) assert not undocumented_attrs,\ "Undocumented attributes: {}".format(undocumented_attrs)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip("numpydoc") from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc["Attributes"] if Estimator.__name__ in ( "HalvingRandomSearchCV", "RandomizedSearchCV", "HalvingGridSearchCV", "GridSearchCV", ): est = _construct_searchcv_instance(Estimator) elif Estimator.__name__ in ( "ColumnTransformer", "Pipeline", "FeatureUnion", ): est = _construct_compose_pipeline_instance(Estimator) elif Estimator.__name__ == "SparseCoder": est = _construct_sparse_coder(Estimator) else: est = _construct_instance(Estimator) if Estimator.__name__ == "SelectKBest": est.set_params(k=2) elif Estimator.__name__ == "DummyClassifier": est.set_params(strategy="stratified") elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"): # default = 2 is invalid for single target est.set_params(n_components=1) elif Estimator.__name__ in ( "GaussianRandomProjection", "SparseRandomProjection", ): # default="auto" raises an error with the shape of `X` est.set_params(n_components=2) # FIXME: TO BE REMOVED in 1.4 (avoid FutureWarning) if Estimator.__name__ in ( "OrthogonalMatchingPursuit", "OrthogonalMatchingPursuitCV", "Lars", "LarsCV", "LassoLars", "LassoLarsCV", "LassoLarsIC", ): est.set_params(normalize=False) # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning) if Estimator.__name__ == "TSNE": est.set_params(learning_rate=200.0, init="random", perplexity=2) # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning) if Estimator.__name__ == "SequentialFeatureSelector": est.set_params(n_features_to_select="auto") # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning) if Estimator.__name__ == "FastICA": est.set_params(whiten="unit-variance") # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning) if Estimator.__name__ == "MiniBatchDictionaryLearning": est.set_params(batch_size=5) # TODO(1.4): TO BE REMOVED for 1.4 (avoid FutureWarning) if Estimator.__name__ in ("KMeans", "MiniBatchKMeans"): est.set_params(n_init="auto") # In case we want to deprecate some attributes in the future skipped_attributes = {} if Estimator.__name__.endswith("Vectorizer"): # Vectorizer require some specific input data if Estimator.__name__ in ( "CountVectorizer", "HashingVectorizer", "TfidfVectorizer", ): X = [ "This is the first document.", "This document is the second document.", "And this is the third one.", "Is this the first document?", ] elif Estimator.__name__ == "DictVectorizer": X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}] y = None else: X, y = make_classification( n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2, ) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if "1dlabels" in est._get_tags()["X_types"]: est.fit(y) elif "2dlabels" in est._get_tags()["X_types"]: est.fit(np.c_[y, y]) else: est.fit(X, y) for attr in attributes: if attr.name in skipped_attributes: continue desc = " ".join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if "only " in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) fit_attr = _get_all_fitted_attributes(est) fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) if undocumented_attrs: raise AssertionError( f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}" )
def test_docstring_parameters(): # Test module docstring formatting # Skip test if numpydoc is not found or if python version is < 3.5 try: import numpydoc # noqa assert sys.version_info >= (3, 5) except (ImportError, AssertionError): raise SkipTest("numpydoc is required to test the docstrings, " "as well as python version >= 3.5") from numpydoc import docscrape incorrect = [] for name in PUBLIC_MODULES: if name.startswith('_') or name.split(".")[1] in IGNORED_MODULES: continue with warnings.catch_warnings(record=True): module = importlib.import_module(name) classes = inspect.getmembers(module, inspect.isclass) # Exclude imported classes classes = [cls for cls in classes if cls[1].__module__ == name] for cname, cls in classes: this_incorrect = [] if cname in _DOCSTRING_IGNORES or cname.startswith('_'): continue if isabstract(cls): continue with warnings.catch_warnings(record=True) as w: cdoc = docscrape.ClassDoc(cls) if len(w): raise RuntimeError('Error for __init__ of %s in %s:\n%s' % (cls, name, w[0])) cls_init = getattr(cls, '__init__', None) if _is_deprecated(cls_init): continue elif cls_init is not None: this_incorrect += check_docstring_parameters(cls.__init__, cdoc, class_name=cname) for method_name in cdoc.methods: method = getattr(cls, method_name) if _is_deprecated(method): continue param_ignore = None # Now skip docstring test for y when y is None # by default for API reason if method_name in _METHODS_IGNORE_NONE_Y: sig = signature(method) if ('y' in sig.parameters and sig.parameters['y'].default is None): param_ignore = ['y'] # ignore y for fit and score result = check_docstring_parameters(method, ignore=param_ignore, class_name=cname) this_incorrect += result incorrect += this_incorrect functions = inspect.getmembers(module, inspect.isfunction) # Exclude imported functions functions = [fn for fn in functions if fn[1].__module__ == name] for fname, func in functions: # Don't test private methods / functions if fname.startswith('_'): continue if fname == "configuration" and name.endswith("setup"): continue name_ = _get_func_name(func) if (not any(d in name_ for d in _DOCSTRING_IGNORES) and not _is_deprecated(func)): incorrect += check_docstring_parameters(func) msg = '\n' + '\n'.join(sorted(list(set(incorrect)))) if len(incorrect) > 0: raise AssertionError("Docstring Error: " + msg)
def test_fit_docstring_attributes(name, Estimator): pytest.importorskip('numpydoc') from numpydoc import docscrape doc = docscrape.ClassDoc(Estimator) attributes = doc['Attributes'] IGNORED = { 'ClassifierChain', 'ColumnTransformer', 'CountVectorizer', 'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection', 'MultiOutputClassifier', 'MultiOutputRegressor', 'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV', 'RegressorChain', 'SelectFromModel', 'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering', 'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer', 'VotingClassifier', 'VotingRegressor', 'SequentialFeatureSelector', 'HalvingGridSearchCV', 'HalvingRandomSearchCV' } if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'): pytest.skip("Estimator cannot be fit easily to test fit attributes") if Estimator.__name__ in ("RandomizedSearchCV", "GridSearchCV"): est = _construct_searchcv_instance(Estimator) else: est = _construct_instance(Estimator) if Estimator.__name__ == 'SelectKBest': est.k = 2 if Estimator.__name__ == 'DummyClassifier': est.strategy = "stratified" if 'PLS' in Estimator.__name__ or 'CCA' in Estimator.__name__: est.n_components = 1 # default = 2 is invalid for single target. # FIXME: TO BE REMOVED for 1.0 (avoid FutureWarning) if Estimator.__name__ == 'AffinityPropagation': est.random_state = 63 # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning) if Estimator.__name__ == 'NMF': est.init = 'nndsvda' X, y = make_classification(n_samples=20, n_features=3, n_redundant=0, n_classes=2, random_state=2) y = _enforce_estimator_tags_y(est, y) X = _enforce_estimator_tags_x(est, X) if '1dlabels' in est._get_tags()['X_types']: est.fit(y) elif '2dlabels' in est._get_tags()['X_types']: est.fit(np.c_[y, y]) else: est.fit(X, y) skipped_attributes = { 'x_scores_', # For PLS, TODO remove in 1.1 'y_scores_' } # For PLS, TODO remove in 1.1 module = est.__module__.split(".")[1] if module in N_FEATURES_MODULES_TO_IGNORE: skipped_attributes.add("n_features_in_") for attr in attributes: if attr.name in skipped_attributes: continue desc = ' '.join(attr.desc).lower() # As certain attributes are present "only" if a certain parameter is # provided, this checks if the word "only" is present in the attribute # description, and if not the attribute is required to be present. if 'only ' in desc: continue # ignore deprecation warnings with ignore_warnings(category=FutureWarning): assert hasattr(est, attr.name) IGNORED = {'Birch', 'LarsCV', 'Lasso', 'OrthogonalMatchingPursuit'} if Estimator.__name__ in IGNORED: pytest.xfail(reason="Estimator has too many undocumented attributes.") fit_attr = [ k for k in est.__dict__.keys() if k.endswith('_') and not k.startswith('_') ] fit_attr_names = [attr.name for attr in attributes] undocumented_attrs = set(fit_attr).difference(fit_attr_names) undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes) assert not undocumented_attrs,\ "Undocumented attributes: {}".format(undocumented_attrs)
def make_InfectionCurveForm(): # here gone all the fields form_fields = {} # this gonna store all the model choices models, methods = [], [] for mname, method in vars(InfectionCurve).items(): if mname.startswith("do_") and callable(method): label = _(mname.split("_", 1)[-1]) models.append((mname, label)) methods.append(method) # add the model select to the form form_fields["model"] = wtf.SelectField( _("Model"), choices=models, description=_("Compartimental model"), render_kw={"class": "custom-select custom-select-sm"}, default=models[0][0], ) # now we add the same parameters for all the methods for method in methods: # extract all the fields doc from the method documentation mtd_docs = docscrape.FunctionDoc(method) docs = { p.name.split(":")[0]: " ".join(p.desc).strip() for p in mtd_docs.get("Parameters") } # extract all the parameters params = inspect.signature(method).parameters for idx, param in enumerate(params.values()): if idx == 0 or param.name in form_fields: continue # extract doc description = docs.get(param.name) # extract the label from the name label = " ".join(param.name.split("_")).title() # add all the validators validators = list(DEFAULT_VALIDATORS) # add the classes to the field element render_kw = dict(DEFAULT_RENDER_KW) render_kw["data-ptype"] = "model-param" # the default default = param.default # the type based on the default Field = PYTYPE_TO_WTF.get(type(default), wtf.StringField) # create the field ffield = Field( _(label), description=_(description), default=default, validators=validators, render_kw=render_kw, ) form_fields[param.name] = ffield # extract all the fields doc from the class documentation class_docs = docscrape.ClassDoc(InfectionCurve) docs = { p.name.split(":")[0]: " ".join(p.desc).strip() for p in class_docs.get("Parameters") } # create one field for attribute for aname, afield in attr.fields_dict(InfectionCurve).items(): # extract doc description = docs.get(aname) # extract the label from the field name label = " ".join(aname.split("_")).title() # add all the validators validators = list(DEFAULT_VALIDATORS) # add the classes to the field element render_kw = dict(DEFAULT_RENDER_KW) render_kw["data-ptype"] = "curve-param" # determine the field type Field = PYTYPE_TO_WTF.get(afield.type, wtf.StringField) # create the field ffield = Field( _(label), description=_(description), default=afield.default, validators=validators, render_kw=render_kw, ) form_fields[aname] = ffield # create the form itself form = type("InfectionCurveForm", (fwtf.FlaskForm, ), form_fields) return form