def test_docstring_parameters():
    # Test module docstring formatting

    # Skip test if numpydoc is not found
    pytest.importorskip('numpydoc',
                        reason="numpydoc is required to test the docstrings")

    # XXX unreached code as of v0.22
    from numpydoc import docscrape

    incorrect = []
    for name in PUBLIC_MODULES:
        if name == 'sklearn.utils.fixes':
            # We cannot always control these docstrings
            continue
        with warnings.catch_warnings(record=True):
            module = importlib.import_module(name)
        classes = inspect.getmembers(module, inspect.isclass)
        # Exclude imported classes
        classes = [cls for cls in classes if cls[1].__module__ == name]
        for cname, cls in classes:
            this_incorrect = []
            if cname in _DOCSTRING_IGNORES or cname.startswith('_'):
                continue
            if inspect.isabstract(cls):
                continue
            with warnings.catch_warnings(record=True) as w:
                cdoc = docscrape.ClassDoc(cls)
            if len(w):
                raise RuntimeError('Error for __init__ of %s in %s:\n%s' %
                                   (cls, name, w[0]))

            cls_init = getattr(cls, '__init__', None)

            if _is_deprecated(cls_init):
                continue
            elif cls_init is not None:
                this_incorrect += check_docstring_parameters(
                    cls.__init__, cdoc)

            for method_name in cdoc.methods:
                method = getattr(cls, method_name)
                if _is_deprecated(method):
                    continue
                param_ignore = None
                # Now skip docstring test for y when y is None
                # by default for API reason
                if method_name in _METHODS_IGNORE_NONE_Y:
                    sig = signature(method)
                    if ('y' in sig.parameters
                            and sig.parameters['y'].default is None):
                        param_ignore = ['y']  # ignore y for fit and score
                result = check_docstring_parameters(method,
                                                    ignore=param_ignore)
                this_incorrect += result

            incorrect += this_incorrect

        functions = inspect.getmembers(module, inspect.isfunction)
        # Exclude imported functions
        functions = [fn for fn in functions if fn[1].__module__ == name]
        for fname, func in functions:
            # Don't test private methods / functions
            if fname.startswith('_'):
                continue
            if fname == "configuration" and name.endswith("setup"):
                continue
            name_ = _get_func_name(func)
            if (not any(d in name_ for d in _DOCSTRING_IGNORES)
                    and not _is_deprecated(func)):
                incorrect += check_docstring_parameters(func)

    msg = '\n'.join(incorrect)
    if len(incorrect) > 0:
        raise AssertionError("Docstring Error:\n" + msg)
def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    IGNORED = {
        'ClassifierChain', 'ColumnTransformer', 'CountVectorizer',
        'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection',
        'GridSearchCV', 'MultiOutputClassifier', 'MultiOutputRegressor',
        'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OneVsRestClassifier',
        'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV',
        'RandomizedSearchCV', 'RegressorChain', 'SelectFromModel',
        'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering',
        'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer',
        'VotingClassifier', 'VotingRegressor'
    }
    if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'):
        pytest.skip("Estimator cannot be fit easily to test fit attributes")

    est = Estimator()

    if Estimator.__name__ == 'SelectKBest':
        est.k = 2

    if Estimator.__name__ == 'DummyClassifier':
        est.strategy = "stratified"

    # TO BE REMOVED for v0.25 (avoid FutureWarning)
    if Estimator.__name__ == 'AffinityPropagation':
        est.random_state = 63

    X, y = make_classification(n_samples=20,
                               n_features=3,
                               n_redundant=0,
                               n_classes=2,
                               random_state=2)

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in est._get_tags()['X_types']:
        est.fit(y)
    elif '2dlabels' in est._get_tags()['X_types']:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    skipped_attributes = {'n_features_in_'}

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' not in desc:
            assert hasattr(est, attr.name)

    IGNORED = {
        'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB', 'ElasticNet',
        'ElasticNetCV', 'GaussianProcessClassifier',
        'GradientBoostingRegressor', 'HistGradientBoostingClassifier',
        'HistGradientBoostingRegressor', 'IsolationForest',
        'KNeighborsClassifier', 'KNeighborsRegressor', 'KNeighborsTransformer',
        'KernelCenterer', 'KernelDensity', 'LarsCV', 'Lasso', 'LassoLarsCV',
        'LassoLarsIC', 'LatentDirichletAllocation', 'LocalOutlierFactor',
        'MDS', 'MiniBatchKMeans', 'MLPClassifier', 'MLPRegressor',
        'MultiTaskElasticNet', 'MultiTaskElasticNetCV', 'MultiTaskLasso',
        'MultiTaskLassoCV', 'NearestNeighbors', 'NuSVR', 'OneClassSVM',
        'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSRegression', 'PLSSVD',
        'PassiveAggressiveClassifier', 'Perceptron', 'RBFSampler',
        'RadiusNeighborsClassifier', 'RadiusNeighborsRegressor',
        'RadiusNeighborsTransformer', 'RandomTreesEmbedding', 'SVR',
        'SkewedChi2Sampler'
    }
    if Estimator.__name__ in IGNORED:
        pytest.xfail(reason="Classifier has too many undocumented attributes.")

    fit_attr = [
        k for k in est.__dict__.keys()
        if k.endswith('_') and not k.startswith('_')
    ]
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)
def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip("numpydoc")
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc["Attributes"]

    if Estimator.__name__ in (
            "HalvingRandomSearchCV",
            "RandomizedSearchCV",
            "HalvingGridSearchCV",
            "GridSearchCV",
    ):
        est = _construct_searchcv_instance(Estimator)
    elif Estimator.__name__ in (
            "ColumnTransformer",
            "Pipeline",
            "FeatureUnion",
    ):
        est = _construct_compose_pipeline_instance(Estimator)
    elif Estimator.__name__ == "SparseCoder":
        est = _construct_sparse_coder(Estimator)
    else:
        est = _construct_instance(Estimator)

    if Estimator.__name__ == "SelectKBest":
        est.set_params(k=2)
    elif Estimator.__name__ == "DummyClassifier":
        est.set_params(strategy="stratified")
    elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"):
        # default = 2 is invalid for single target
        est.set_params(n_components=1)
    elif Estimator.__name__ in (
            "GaussianRandomProjection",
            "SparseRandomProjection",
    ):
        # default="auto" raises an error with the shape of `X`
        est.set_params(n_components=2)

    # FIXME: TO BE REMOVED in 1.4 (avoid FutureWarning)
    if Estimator.__name__ in (
            "OrthogonalMatchingPursuit",
            "OrthogonalMatchingPursuitCV",
            "Lars",
            "LarsCV",
            "LassoLars",
            "LassoLarsCV",
            "LassoLarsIC",
    ):
        est.set_params(normalize=False)

    # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning)
    if Estimator.__name__ == "TSNE":
        est.set_params(learning_rate=200.0, init="random", perplexity=2)

    # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning)
    if Estimator.__name__ == "SequentialFeatureSelector":
        est.set_params(n_features_to_select="auto")

    # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning)
    if Estimator.__name__ == "FastICA":
        est.set_params(whiten="unit-variance")

    # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning)
    if Estimator.__name__ == "MiniBatchDictionaryLearning":
        est.set_params(batch_size=5)

    # TODO(1.4): TO BE REMOVED for 1.4 (avoid FutureWarning)
    if Estimator.__name__ in ("KMeans", "MiniBatchKMeans"):
        est.set_params(n_init="auto")

    # In case we want to deprecate some attributes in the future
    skipped_attributes = {}

    if Estimator.__name__.endswith("Vectorizer"):
        # Vectorizer require some specific input data
        if Estimator.__name__ in (
                "CountVectorizer",
                "HashingVectorizer",
                "TfidfVectorizer",
        ):
            X = [
                "This is the first document.",
                "This document is the second document.",
                "And this is the third one.",
                "Is this the first document?",
            ]
        elif Estimator.__name__ == "DictVectorizer":
            X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
        y = None
    else:
        X, y = make_classification(
            n_samples=20,
            n_features=3,
            n_redundant=0,
            n_classes=2,
            random_state=2,
        )

        y = _enforce_estimator_tags_y(est, y)
        X = _enforce_estimator_tags_x(est, X)

    if "1dlabels" in est._get_tags()["X_types"]:
        est.fit(y)
    elif "2dlabels" in est._get_tags()["X_types"]:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = " ".join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if "only " in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    fit_attr = _get_all_fitted_attributes(est)
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    if undocumented_attrs:
        raise AssertionError(
            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
        )
예제 #4
0
def test_docstring_parameters():
    # Test module docstring formatting

    # Skip test if numpydoc is not found or if python version is < 3.5
    try:
        import numpydoc  # noqa
        assert sys.version_info >= (3, 5)
    except (ImportError, AssertionError):
        raise SkipTest("numpydoc is required to test the docstrings, "
                       "as well as python version >= 3.5")

    from numpydoc import docscrape

    incorrect = []
    for name in PUBLIC_MODULES:
        if name.startswith('_') or name.split(".")[1] in IGNORED_MODULES:
            continue
        with warnings.catch_warnings(record=True):
            module = importlib.import_module(name)
        classes = inspect.getmembers(module, inspect.isclass)
        # Exclude imported classes
        classes = [cls for cls in classes if cls[1].__module__ == name]
        for cname, cls in classes:
            this_incorrect = []
            if cname in _DOCSTRING_IGNORES or cname.startswith('_'):
                continue
            if isabstract(cls):
                continue
            with warnings.catch_warnings(record=True) as w:
                cdoc = docscrape.ClassDoc(cls)
            if len(w):
                raise RuntimeError('Error for __init__ of %s in %s:\n%s' %
                                   (cls, name, w[0]))

            cls_init = getattr(cls, '__init__', None)

            if _is_deprecated(cls_init):
                continue

            elif cls_init is not None:
                this_incorrect += check_docstring_parameters(cls.__init__,
                                                             cdoc,
                                                             class_name=cname)
            for method_name in cdoc.methods:
                method = getattr(cls, method_name)
                if _is_deprecated(method):
                    continue
                param_ignore = None
                # Now skip docstring test for y when y is None
                # by default for API reason
                if method_name in _METHODS_IGNORE_NONE_Y:
                    sig = signature(method)
                    if ('y' in sig.parameters
                            and sig.parameters['y'].default is None):
                        param_ignore = ['y']  # ignore y for fit and score
                result = check_docstring_parameters(method,
                                                    ignore=param_ignore,
                                                    class_name=cname)
                this_incorrect += result

            incorrect += this_incorrect

        functions = inspect.getmembers(module, inspect.isfunction)
        # Exclude imported functions
        functions = [fn for fn in functions if fn[1].__module__ == name]
        for fname, func in functions:
            # Don't test private methods / functions
            if fname.startswith('_'):
                continue
            if fname == "configuration" and name.endswith("setup"):
                continue
            name_ = _get_func_name(func)
            if (not any(d in name_ for d in _DOCSTRING_IGNORES)
                    and not _is_deprecated(func)):
                incorrect += check_docstring_parameters(func)
    msg = '\n' + '\n'.join(sorted(list(set(incorrect))))
    if len(incorrect) > 0:
        raise AssertionError("Docstring Error: " + msg)
예제 #5
0
def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    IGNORED = {
        'ClassifierChain', 'ColumnTransformer', 'CountVectorizer',
        'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection',
        'MultiOutputClassifier', 'MultiOutputRegressor',
        'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OutputCodeClassifier',
        'Pipeline', 'RFE', 'RFECV', 'RegressorChain', 'SelectFromModel',
        'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering',
        'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer',
        'VotingClassifier', 'VotingRegressor', 'SequentialFeatureSelector',
        'HalvingGridSearchCV', 'HalvingRandomSearchCV'
    }
    if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'):
        pytest.skip("Estimator cannot be fit easily to test fit attributes")

    if Estimator.__name__ in ("RandomizedSearchCV", "GridSearchCV"):
        est = _construct_searchcv_instance(Estimator)
    else:
        est = _construct_instance(Estimator)

    if Estimator.__name__ == 'SelectKBest':
        est.k = 2

    if Estimator.__name__ == 'DummyClassifier':
        est.strategy = "stratified"

    if 'PLS' in Estimator.__name__ or 'CCA' in Estimator.__name__:
        est.n_components = 1  # default = 2 is invalid for single target.

    # FIXME: TO BE REMOVED for 1.0 (avoid FutureWarning)
    if Estimator.__name__ == 'AffinityPropagation':
        est.random_state = 63

    # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning)
    if Estimator.__name__ == 'NMF':
        est.init = 'nndsvda'

    X, y = make_classification(n_samples=20,
                               n_features=3,
                               n_redundant=0,
                               n_classes=2,
                               random_state=2)

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in est._get_tags()['X_types']:
        est.fit(y)
    elif '2dlabels' in est._get_tags()['X_types']:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    skipped_attributes = {
        'x_scores_',  # For PLS, TODO remove in 1.1
        'y_scores_'
    }  # For PLS, TODO remove in 1.1

    module = est.__module__.split(".")[1]
    if module in N_FEATURES_MODULES_TO_IGNORE:
        skipped_attributes.add("n_features_in_")

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    IGNORED = {'Birch', 'LarsCV', 'Lasso', 'OrthogonalMatchingPursuit'}

    if Estimator.__name__ in IGNORED:
        pytest.xfail(reason="Estimator has too many undocumented attributes.")

    fit_attr = [
        k for k in est.__dict__.keys()
        if k.endswith('_') and not k.startswith('_')
    ]
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)
예제 #6
0
def make_InfectionCurveForm():

    # here gone all the fields
    form_fields = {}

    # this gonna store all the model choices
    models, methods = [], []
    for mname, method in vars(InfectionCurve).items():
        if mname.startswith("do_") and callable(method):
            label = _(mname.split("_", 1)[-1])
            models.append((mname, label))
            methods.append(method)

    # add the model select to the form
    form_fields["model"] = wtf.SelectField(
        _("Model"),
        choices=models,
        description=_("Compartimental model"),
        render_kw={"class": "custom-select custom-select-sm"},
        default=models[0][0],
    )

    # now we add the same parameters for all the methods
    for method in methods:

        # extract all the fields doc from the method documentation
        mtd_docs = docscrape.FunctionDoc(method)
        docs = {
            p.name.split(":")[0]: " ".join(p.desc).strip()
            for p in mtd_docs.get("Parameters")
        }

        # extract all the parameters
        params = inspect.signature(method).parameters
        for idx, param in enumerate(params.values()):
            if idx == 0 or param.name in form_fields:
                continue

            # extract doc
            description = docs.get(param.name)

            # extract the label from the name
            label = " ".join(param.name.split("_")).title()

            # add all the validators
            validators = list(DEFAULT_VALIDATORS)

            # add the classes to the field element
            render_kw = dict(DEFAULT_RENDER_KW)
            render_kw["data-ptype"] = "model-param"

            # the default
            default = param.default

            # the type based on the default
            Field = PYTYPE_TO_WTF.get(type(default), wtf.StringField)

            # create the field
            ffield = Field(
                _(label),
                description=_(description),
                default=default,
                validators=validators,
                render_kw=render_kw,
            )
            form_fields[param.name] = ffield

    # extract all the fields doc from the class documentation
    class_docs = docscrape.ClassDoc(InfectionCurve)
    docs = {
        p.name.split(":")[0]: " ".join(p.desc).strip()
        for p in class_docs.get("Parameters")
    }

    # create one field for attribute
    for aname, afield in attr.fields_dict(InfectionCurve).items():
        # extract doc
        description = docs.get(aname)

        # extract the label from the field name
        label = " ".join(aname.split("_")).title()

        # add all the validators
        validators = list(DEFAULT_VALIDATORS)

        # add the classes to the field element
        render_kw = dict(DEFAULT_RENDER_KW)
        render_kw["data-ptype"] = "curve-param"

        # determine the field type
        Field = PYTYPE_TO_WTF.get(afield.type, wtf.StringField)

        # create the field
        ffield = Field(
            _(label),
            description=_(description),
            default=afield.default,
            validators=validators,
            render_kw=render_kw,
        )
        form_fields[aname] = ffield

    # create the form itself
    form = type("InfectionCurveForm", (fwtf.FlaskForm, ), form_fields)
    return form