Python _enforce_estimator_tags_xの例、sklearn.utils.estimator_checks._enforce_estimator_tags_x Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_metaestimators.py プロジェクト: Aathi410/Pro123

def test_meta_estimators_delegate_data_validation(estimator):
    # Check that meta-estimators delegate data validation to the inner
    # estimator(s).
    rng = np.random.RandomState(0)
    set_random_state(estimator)

    n_samples = 30
    X = rng.choice(np.array(["aa", "bb", "cc"], dtype=object), size=n_samples)

    if is_regressor(estimator):
        y = rng.normal(size=n_samples)
    else:
        y = rng.randint(3, size=n_samples)

    # We convert to lists to make sure it works on array-like
    X = _enforce_estimator_tags_x(estimator, X).tolist()
    y = _enforce_estimator_tags_y(estimator, y).tolist()

    # Calling fit should not raise any data validation exception since X is a
    # valid input datastructure for the first step of the pipeline passed as
    # base estimator to the meta estimator.
    estimator.fit(X, y)

    # n_features_in_ should not be defined since data is not tabular data.
    assert not hasattr(estimator, "n_features_in_")

コード例 #2

0

ファイルを表示

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip("numpydoc")
    from numpydoc import docscrape

    if Estimator.__name__ in _DOCSTRING_IGNORES:
        return

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc["Attributes"]

    if Estimator.__name__ == "Pipeline":
        est = _construct_compose_pipeline_instance(Estimator)
    else:
        est = _construct_instance(Estimator)

    X, y = make_classification(
        n_samples=20,
        n_features=3,
        n_redundant=0,
        n_classes=2,
        random_state=2,
    )

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if "oob_score" in est.get_params():
        est.set_params(oob_score=True)

    if is_sampler(est):
        est.fit_resample(X, y)
    else:
        est.fit(X, y)

    skipped_attributes = set([])

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = " ".join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if "only " in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    fit_attr = _get_all_fitted_attributes(est)
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    if undocumented_attrs:
        raise AssertionError(
            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
        )

コード例 #3

0

ファイルを表示

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip("numpydoc")
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc["Attributes"]

    if Estimator.__name__ in (
        "HalvingRandomSearchCV",
        "RandomizedSearchCV",
        "HalvingGridSearchCV",
        "GridSearchCV",
    ):
        est = _construct_searchcv_instance(Estimator)
    elif Estimator.__name__ in (
        "ColumnTransformer",
        "Pipeline",
        "FeatureUnion",
    ):
        est = _construct_compose_pipeline_instance(Estimator)
    elif Estimator.__name__ == "SparseCoder":
        est = _construct_sparse_coder(Estimator)
    else:
        est = _construct_instance(Estimator)

    if Estimator.__name__ == "SelectKBest":
        est.set_params(k=2)
    elif Estimator.__name__ == "DummyClassifier":
        est.set_params(strategy="stratified")
    elif Estimator.__name__ == "CCA" or Estimator.__name__.startswith("PLS"):
        # default = 2 is invalid for single target
        est.set_params(n_components=1)
    elif Estimator.__name__ in (
        "GaussianRandomProjection",
        "SparseRandomProjection",
    ):
        # default="auto" raises an error with the shape of `X`
        est.set_params(n_components=2)

    # FIXME: TO BE REMOVED in 1.4 (avoid FutureWarning)
    if Estimator.__name__ in (
        "OrthogonalMatchingPursuit",
        "OrthogonalMatchingPursuitCV",
        "Lars",
        "LarsCV",
        "LassoLars",
        "LassoLarsCV",
        "LassoLarsIC",
    ):
        est.set_params(normalize=False)

    # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning)
    if Estimator.__name__ == "TSNE":
        est.set_params(learning_rate=200.0, init="random")

    # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning)
    if Estimator.__name__ == "SequentialFeatureSelector":
        est.set_params(n_features_to_select="auto")

    # For PLS, TODO remove in 1.1
    skipped_attributes = {"x_scores_", "y_scores_"}

    # FIXME: TO BE REMOVED for 1.3 (avoid FutureWarning)
    if Estimator.__name__ == "FastICA":
        est.set_params(whiten="unit-variance")

    if Estimator.__name__.endswith("Vectorizer"):
        # Vectorizer require some specific input data
        if Estimator.__name__ in (
            "CountVectorizer",
            "HashingVectorizer",
            "TfidfVectorizer",
        ):
            X = [
                "This is the first document.",
                "This document is the second document.",
                "And this is the third one.",
                "Is this the first document?",
            ]
        elif Estimator.__name__ == "DictVectorizer":
            X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
        y = None
    else:
        X, y = make_classification(
            n_samples=20,
            n_features=3,
            n_redundant=0,
            n_classes=2,
            random_state=2,
        )

        y = _enforce_estimator_tags_y(est, y)
        X = _enforce_estimator_tags_x(est, X)

    if "1dlabels" in est._get_tags()["X_types"]:
        est.fit(y)
    elif "2dlabels" in est._get_tags()["X_types"]:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = " ".join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if "only " in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    fit_attr = _get_all_fitted_attributes(est)
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    if undocumented_attrs:
        raise AssertionError(
            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
        )

コード例 #4

0

ファイルを表示

ファイル: test_docstring_parameters.py プロジェクト: lkemkes/tfidftransformer-fix-docstring

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    IGNORED = {
        'ClassifierChain', 'ColumnTransformer', 'CountVectorizer',
        'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection',
        'GridSearchCV', 'MultiOutputClassifier', 'MultiOutputRegressor',
        'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OneVsRestClassifier',
        'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV',
        'RandomizedSearchCV', 'RegressorChain', 'SelectFromModel',
        'SparseCoder', 'SparseRandomProjection', 'SpectralBiclustering',
        'StackingClassifier', 'StackingRegressor', 'TfidfVectorizer',
        'VotingClassifier', 'VotingRegressor'
    }
    if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'):
        pytest.skip("Estimator cannot be fit easily to test fit attributes")

    est = Estimator()

    if Estimator.__name__ == 'SelectKBest':
        est.k = 2

    if Estimator.__name__ == 'DummyClassifier':
        est.strategy = "stratified"

    X, y = make_classification(n_samples=20,
                               n_features=3,
                               n_redundant=0,
                               n_classes=2,
                               random_state=2)

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in _safe_tags(est, 'X_types'):
        est.fit(y)
    elif '2dlabels' in _safe_tags(est, 'X_types'):
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    skipped_attributes = {'n_features_in_'}

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' not in desc:
            assert hasattr(est, attr.name)

    IGNORED = {
        'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB', 'ElasticNet',
        'ElasticNetCV', 'GaussianProcessClassifier',
        'GradientBoostingRegressor', 'HistGradientBoostingClassifier',
        'HistGradientBoostingRegressor', 'IsolationForest',
        'KNeighborsClassifier', 'KNeighborsRegressor', 'KNeighborsTransformer',
        'KernelCenterer', 'KernelDensity', 'LarsCV', 'Lasso', 'LassoLarsCV',
        'LassoLarsIC', 'LatentDirichletAllocation', 'LocalOutlierFactor',
        'MDS', 'MiniBatchKMeans', 'MLPClassifier', 'MLPRegressor',
        'MultiTaskElasticNet', 'MultiTaskElasticNetCV', 'MultiTaskLasso',
        'MultiTaskLassoCV', 'NearestNeighbors', 'NuSVR', 'OAS', 'OneClassSVM',
        'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSRegression', 'PLSSVD',
        'PassiveAggressiveClassifier', 'Perceptron', 'RBFSampler',
        'RadiusNeighborsClassifier', 'RadiusNeighborsRegressor',
        'RadiusNeighborsTransformer', 'RandomTreesEmbedding', 'SVR',
        'SkewedChi2Sampler'
    }
    if Estimator.__name__ in IGNORED:
        pytest.xfail(reason="Classifier has too many undocumented attributes.")

    fit_attr = [
        k for k in est.__dict__.keys()
        if k.endswith('_') and not k.startswith('_')
    ]
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)

コード例 #5

0

ファイルを表示

ファイル: test_docstring_parameters.py プロジェクト: cozek/scikit-learn

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    if Estimator.__name__ in (
            "HalvingRandomSearchCV",
            "RandomizedSearchCV",
            "HalvingGridSearchCV",
            "GridSearchCV",
    ):
        est = _construct_searchcv_instance(Estimator)
    elif Estimator.__name__ in (
            "ColumnTransformer",
            "Pipeline",
            "FeatureUnion",
    ):
        est = _construct_compose_pipeline_instance(Estimator)
    elif Estimator.__name__ == "SparseCoder":
        est = _construct_sparse_coder(Estimator)
    else:
        est = _construct_instance(Estimator)

    if Estimator.__name__ == 'SelectKBest':
        est.set_params(k=2)
    elif Estimator.__name__ == 'DummyClassifier':
        est.set_params(strategy="stratified")
    elif Estimator.__name__ == 'CCA' or Estimator.__name__.startswith('PLS'):
        # default = 2 is invalid for single target
        est.set_params(n_components=1)
    elif Estimator.__name__ in (
            "GaussianRandomProjection",
            "SparseRandomProjection",
    ):
        # default="auto" raises an error with the shape of `X`
        est.set_params(n_components=2)

    # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning)
    if Estimator.__name__ == 'NMF':
        est.set_params(init='nndsvda')

    # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning)
    if Estimator.__name__ == 'TSNE':
        est.set_params(learning_rate=200.0, init='random')

    # For PLS, TODO remove in 1.1
    skipped_attributes = {"x_scores_", "y_scores_"}

    if Estimator.__name__.endswith("Vectorizer"):
        # Vectorizer require some specific input data
        if Estimator.__name__ in (
                "CountVectorizer",
                "HashingVectorizer",
                "TfidfVectorizer",
        ):
            X = [
                "This is the first document.",
                "This document is the second document.",
                "And this is the third one.",
                "Is this the first document?",
            ]
        elif Estimator.__name__ == "DictVectorizer":
            X = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
        y = None
    else:
        X, y = make_classification(
            n_samples=20,
            n_features=3,
            n_redundant=0,
            n_classes=2,
            random_state=2,
        )

        y = _enforce_estimator_tags_y(est, y)
        X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in est._get_tags()['X_types']:
        est.fit(y)
    elif '2dlabels' in est._get_tags()['X_types']:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    module = est.__module__.split(".")[1]
    if module in N_FEATURES_MODULES_TO_IGNORE:
        skipped_attributes.add("n_features_in_")

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    fit_attr = _get_all_fitted_attributes(est)
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)

コード例 #6

0

ファイルを表示

ファイル: test_docstring_parameters.py プロジェクト: tom1092/scikit-learn

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    IGNORED = {'ClassifierChain', 'ColumnTransformer',
               'CountVectorizer', 'DictVectorizer', 'FeatureUnion',
               'GaussianRandomProjection',
               'MultiOutputClassifier', 'MultiOutputRegressor',
               'NoSampleWeightWrapper', 'OneVsOneClassifier',
               'OutputCodeClassifier', 'Pipeline', 'RFE', 'RFECV',
               'RegressorChain', 'SelectFromModel',
               'SparseCoder', 'SparseRandomProjection',
               'SpectralBiclustering', 'StackingClassifier',
               'StackingRegressor', 'TfidfVectorizer', 'VotingClassifier',
               'VotingRegressor', 'SequentialFeatureSelector',
               'HalvingGridSearchCV', 'HalvingRandomSearchCV'}
    if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'):
        pytest.skip("Estimator cannot be fit easily to test fit attributes")

    if Estimator.__name__ in ("RandomizedSearchCV", "GridSearchCV"):
        est = _construct_searchcv_instance(Estimator)
    else:
        est = _construct_instance(Estimator)

    if Estimator.__name__ == 'SelectKBest':
        est.k = 2

    if Estimator.__name__ == 'DummyClassifier':
        est.strategy = "stratified"

    if 'PLS' in Estimator.__name__ or 'CCA' in Estimator.__name__:
        est.n_components = 1  # default = 2 is invalid for single target.

    # FIXME: TO BE REMOVED for 1.0 (avoid FutureWarning)
    if Estimator.__name__ == 'AffinityPropagation':
        est.random_state = 63

    # FIXME: TO BE REMOVED for 1.1 (avoid FutureWarning)
    if Estimator.__name__ == 'NMF':
        est.init = 'nndsvda'

    # FIXME: TO BE REMOVED for 1.2 (avoid FutureWarning)
    if Estimator.__name__ == 'TSNE':
        est.learning_rate = 200.0
        est.init = 'random'

    X, y = make_classification(n_samples=20, n_features=3,
                               n_redundant=0, n_classes=2,
                               random_state=2)

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in est._get_tags()['X_types']:
        est.fit(y)
    elif '2dlabels' in est._get_tags()['X_types']:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    skipped_attributes = {'x_scores_',  # For PLS, TODO remove in 1.1
                          'y_scores_'}  # For PLS, TODO remove in 1.1

    module = est.__module__.split(".")[1]
    if module in N_FEATURES_MODULES_TO_IGNORE:
        skipped_attributes.add("n_features_in_")

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    IGNORED = {'Birch', 'LarsCV', 'Lasso',
               'OrthogonalMatchingPursuit'}

    if Estimator.__name__ in IGNORED:
        pytest.xfail(
            reason="Estimator has too many undocumented attributes.")

    fit_attr = [k for k in est.__dict__.keys() if k.endswith('_')
                and not k.startswith('_')]
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)

コード例 #7

0

ファイルを表示

ファイル: test_docstring_parameters.py プロジェクト: yananhou/scikit-learn

def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip('numpydoc')
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc['Attributes']

    IGNORED = {
        'ClassifierChain', 'ColumnTransformer', 'CountVectorizer',
        'DictVectorizer', 'FeatureUnion', 'GaussianRandomProjection',
        'GridSearchCV', 'MultiOutputClassifier', 'MultiOutputRegressor',
        'NoSampleWeightWrapper', 'OneVsOneClassifier', 'OutputCodeClassifier',
        'Pipeline', 'RFE', 'RFECV', 'RandomizedSearchCV', 'RegressorChain',
        'SelectFromModel', 'SparseCoder', 'SparseRandomProjection',
        'SpectralBiclustering', 'StackingClassifier', 'StackingRegressor',
        'TfidfVectorizer', 'VotingClassifier', 'VotingRegressor'
    }
    if Estimator.__name__ in IGNORED or Estimator.__name__.startswith('_'):
        pytest.skip("Estimator cannot be fit easily to test fit attributes")

    est = _construct_instance(Estimator)

    if Estimator.__name__ == 'SelectKBest':
        est.k = 2

    if Estimator.__name__ == 'DummyClassifier':
        est.strategy = "stratified"

    # TO BE REMOVED for v0.25 (avoid FutureWarning)
    if Estimator.__name__ == 'AffinityPropagation':
        est.random_state = 63

    X, y = make_classification(n_samples=20,
                               n_features=3,
                               n_redundant=0,
                               n_classes=2,
                               random_state=2)

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if '1dlabels' in est._get_tags()['X_types']:
        est.fit(y)
    elif '2dlabels' in est._get_tags()['X_types']:
        est.fit(np.c_[y, y])
    else:
        est.fit(X, y)

    skipped_attributes = {'n_features_in_'}

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = ' '.join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if 'only ' in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    IGNORED = {
        'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB', 'KernelCenterer',
        'LarsCV', 'Lasso', 'LassoLarsIC', 'MiniBatchKMeans',
        'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSSVD',
        'PassiveAggressiveClassifier'
    }

    if Estimator.__name__ in IGNORED:
        pytest.xfail(reason="Estimator has too many undocumented attributes.")

    fit_attr = [
        k for k in est.__dict__.keys()
        if k.endswith('_') and not k.startswith('_')
    ]
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    assert not undocumented_attrs,\
        "Undocumented attributes: {}".format(undocumented_attrs)