예제 #1
0
def test_sparsify_estimators():
    """Test if predict with sparsified estimators works.

    Tests regression, binary classification, and multi-class classification.
    """
    estimators = all_estimators()
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
    y = [1, 1, 1, 2, 2, 2]

    # test regression and binary classification
    for name, Estimator in estimators:
        try:
            Estimator.sparsify
        except:
            continue
        yield check_sparsify_binary_classifier, name, Estimator, X, y

    # test multiclass classification
    classifiers = all_estimators(type_filter='classifier')
    y[-1] = 3  # make multi-class
    for name, Classifier in classifiers:
        try:
            Classifier.sparsify
        except:
            continue
        yield check_sparsify_multiclass_classifier, name, Classifier, X, y
예제 #2
0
def test_sparsify_estimators():
    """Test if predict with sparsified estimators works.

    Tests regression, binary classification, and multi-class classification.
    """
    estimators = all_estimators()
    X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
    y = [1, 1, 1, 2, 2, 2]

    # test regression and binary classification
    for name, Estimator in estimators:
        try:
            Estimator.sparsify
        except:
            continue

        est = Estimator()

        est.fit(X, y)
        pred_orig = est.predict(X)

        # test sparsify with dense inputs
        est.sparsify()
        assert_true(sparse.issparse(est.coef_))
        pred = est.predict(X)
        assert_array_equal(pred, pred_orig)

        # pickle and unpickle with sparse coef_
        est = pickle.loads(pickle.dumps(est))
        assert_true(sparse.issparse(est.coef_))
        pred = est.predict(X)
        assert_array_equal(pred, pred_orig)


    # test multiclass classification
    classifiers = all_estimators(type_filter='classifier')
    y[-1] = 3  # make multi-class
    for name, Classifier in classifiers:
        try:
            Classifier.sparsify
        except:
            continue
        est = Classifier()

        est.fit(X, y)
        pred_orig = est.predict(X)

        # test sparsify with dense inputs
        est.sparsify()
        assert_true(sparse.issparse(est.coef_))
        pred = est.predict(X)
        assert_array_equal(pred, pred_orig)

        # pickle and unpickle with sparse coef_
        est = pickle.loads(pickle.dumps(est))
        assert_true(sparse.issparse(est.coef_))
        pred = est.predict(X)
        assert_array_equal(pred, pred_orig)
예제 #3
0
파일: init.py 프로젝트: hjanime/VisTrails
def discover_supervised():
    classifiers = all_estimators(type_filter="classifier")
    regressors = all_estimators(type_filter="regressor")
    classes = []
    for name, Est in classifiers + regressors:
        if issubclass(Est, ClassifierMixin):
            namespace = "classifiers"
        else:
            namespace = "regressors"
        classes.append(make_module(name, Est, namespace, supervised=True))
    return classes
예제 #4
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.endswith("HMM") or name.startswith("_"):
            continue
        if name not in CROSS_DECOMPOSITION:
            yield check_estimators_dtypes, name, Estimator
            yield check_fit_score_takes_y, name, Estimator
            yield check_dtype_object, name, Estimator

            # Check that all estimator yield informative messages when
            # trained on empty datasets
            yield check_estimators_empty_data_messages, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['SpectralEmbedding']:
            # SpectralEmbedding is non-deterministic,
            # see issue #4236
            yield check_pipeline_consistency, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['Imputer']:
            # Test that all estimators check their input for NaN's and infs
            yield check_estimators_nan_inf, name, Estimator

        if name not in CROSS_DECOMPOSITION + ['GaussianProcess']:
            # FIXME!
            # in particular GaussianProcess!
            yield check_estimators_overwrite_params, name, Estimator
        if hasattr(Estimator, 'sparsify'):
            yield check_sparsify_coefficients, name, Estimator

        yield check_estimator_sparse_data, name, Estimator
예제 #5
0
def _tested_non_meta_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        yield name, Estimator
예제 #6
0
def test_class_weight_classifiers():
    # test that class_weight works and that the semantics are consistent
    classifiers = all_estimators(type_filter="classifier")

    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers if "class_weight" in c[1]().get_params().keys()]

    for n_centers in [2, 3]:
        # create a very noisy dataset
        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
        for name, Classifier in classifiers:
            if name == "NuSVC":
                # the sparse version has a parameter that doesn't do anything
                continue
            if name.endswith("NB"):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                continue
            if n_centers == 2:
                class_weight = {0: 1000, 1: 0.0001}
            else:
                class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}

            with warnings.catch_warnings(record=True):
                classifier = Classifier(class_weight=class_weight)
            if hasattr(classifier, "n_iter"):
                classifier.set_params(n_iter=100)

            set_random_state(classifier)
            classifier.fit(X_train, y_train)
            y_pred = classifier.predict(X_test)
            assert_greater(np.mean(y_pred == 0), 0.9)
예제 #7
0
def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter="regressor")
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    for name, Regressor in regressors:
        if name in dont_test or name in ("CCA"):
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            # separate estimators to control random seeds
            regressor_1 = Regressor()
            regressor_2 = Regressor()
        set_random_state(regressor_1)
        set_random_state(regressor_2)

        if name in ("_PLS", "PLSCanonical", "PLSRegression"):
            y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        # fit
        regressor_1.fit(X, y_)
        pred1 = regressor_1.predict(X)
        regressor_2.fit(X, y_.astype(np.float))
        pred2 = regressor_2.predict(X)
        assert_array_almost_equal(pred1, pred2, 2, name)
예제 #8
0
def test_classifiers_classes():
    # test if classifiers can cope with non-consecutive classes
    estimators = all_estimators()
    classifiers = [(name, E) for name, E in estimators if issubclass(E,
        ClassifierMixin)]
    iris = load_iris()
    X, y = iris.data, iris.target
    X, y = shuffle(X, y, random_state=7)
    X = StandardScaler().fit_transform(X)
    y = 2 * y + 1
    # TODO: make work with next line :)
    #y = y.astype(np.str)
    for name, Clf in classifiers:
        if Clf in dont_test or Clf in meta_estimators:
            continue
        if Clf in [MultinomialNB, BernoulliNB]:
            # TODO also test these!
            continue

        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            clf = Clf()
        # fit
        clf.fit(X, y)
        y_pred = clf.predict(X)
        # training set performance
        assert_array_equal(np.unique(y), np.unique(y_pred))
        assert_greater(zero_one_score(y, y_pred), 0.78)
예제 #9
0
def test_estimators_sparse_data():
    # All estimators should either deal with sparse data, or raise an
    # intelligible error message
    rng = np.random.RandomState(0)
    X = rng.rand(40, 10)
    X[X < .8] = 0
    X = sparse.csr_matrix(X)
    y = (4 * rng.rand(40)).astype(np.int)
    estimators = all_estimators()
    estimators = [(name, E) for name, E in estimators
                        if issubclass(E, (ClassifierMixin, RegressorMixin))]
    for name, Clf in estimators:
        if Clf in dont_test or Clf in meta_estimators:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            clf = Clf()
        # fit
        try:
            clf.fit(X, y)
        except TypeError, e:
            if not 'sparse' in repr(e):
                print ("Estimator %s doesn't seem to fail gracefully on "
                    "sparse data" % name)
                traceback.print_exc(file=sys.stdout)
                raise e
        except Exception, exc:
            print ("Estimator %s doesn't seem to fail gracefully on "
                "sparse data" % name)
            traceback.print_exc(file=sys.stdout)
            raise exc
예제 #10
0
def test_transformers_data_not_an_array():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1

    for name, Transformer in transformers:
        # XXX: some transformers are transforming the input
        # data. This is a bug that we'll fix later. Right now we copy
        # the data each time
        this_X = NotAnArray(X.copy())
        this_y = NotAnArray(np.asarray(y))
        if name in dont_test:
            continue
        # these don't actually fit the data:
        if name in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            continue
        # And these wan't multivariate output
        if name in ('PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
            continue
        yield check_transformer, name, Transformer, this_X, this_y
예제 #11
0
def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter='regressor')
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    X = StandardScaler().fit_transform(X)
    y = np.random.randint(2, size=X.shape[0])
    for name, Reg in regressors:
        if Reg in dont_test or Reg in (CCA,):
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            # separate estimators to control random seeds
            reg1 = Reg()
            reg2 = Reg()
        set_random_state(reg1)
        set_random_state(reg2)

        if Reg in (_PLS, PLSCanonical, PLSRegression):
            y_ = np.vstack([y, 2 * y + np.random.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        # fit
        reg1.fit(X, y_)
        pred1 = reg1.predict(X)
        reg2.fit(X, y_.astype(np.float))
        pred2 = reg2.predict(X)
        assert_array_almost_equal(pred1, pred2, 2, name)
예제 #12
0
def test_classifiers_classes():
    # test if classifiers can cope with non-consecutive classes
    classifiers = all_estimators(type_filter='classifier')
    X, y = make_blobs(random_state=12345)
    X, y = shuffle(X, y, random_state=7)
    X = StandardScaler().fit_transform(X)
    y = 2 * y + 1
    classes = np.unique(y)
    # TODO: make work with next line :)
    #y = y.astype(np.str)
    for name, Clf in classifiers:
        if Clf in dont_test:
            continue
        if Clf in [MultinomialNB, BernoulliNB]:
            # TODO also test these!
            continue

        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            clf = Clf()
        # fit
        clf.fit(X, y)
        y_pred = clf.predict(X)
        # training set performance
        assert_array_equal(np.unique(y), np.unique(y_pred))
        assert_greater(zero_one_score(y, y_pred), 0.78,
                       "accuracy of %s not greater than 0.78" % str(Clf))
        assert_array_equal(
            clf.classes_, classes,
            "Unexpected classes_ attribute for %r" % clf)
def inspect(afilter='classifier', parameter='sample_weight'):
    """ helps you inspect some of the parameters and some options you may want  to choose"""
    import inspect
    from sklearn.utils.testing import all_estimators
    for name, clf in all_estimators(type_filter=afilter):
        if parameter in inspect.getargspec(clf().fit)[0]:
            print name
예제 #14
0
def test_estimators_nan_inf():
    # Test that all estimators check their input for NaN's and infs
    estimators = all_estimators(type_filter=['classifier', 'regressor',
                                             'transformer', 'cluster'])
    for name, Estimator in estimators:
        if name not in CROSS_DECOMPOSITION + ['Imputer']:
            yield check_estimators_nan_inf, name, Estimator
예제 #15
0
def test_estimators_nan_inf():
    # Test that all estimators check their input for NaN's and infs
    rnd = np.random.RandomState(0)
    X_train_finite = rnd.uniform(size=(10, 3))
    X_train_nan = rnd.uniform(size=(10, 3))
    X_train_nan[0, 0] = np.nan
    X_train_inf = rnd.uniform(size=(10, 3))
    X_train_inf[0, 0] = np.inf
    y = np.ones(10)
    y[:5] = 0
    estimators = all_estimators()
    estimators = [(name, E) for name, E in estimators
                  if (issubclass(E, ClassifierMixin) or
                      issubclass(E, RegressorMixin) or
                      issubclass(E, TransformerMixin) or
                      issubclass(E, ClusterMixin))]
    for X_train in [X_train_nan, X_train_inf]:
        for name, Estimator in estimators:
            if name in dont_test:
                continue
            if name in ('PLSCanonical', 'PLSRegression', 'CCA',
                        'PLSSVD', 'Imputer'):  # Imputer accepts nan
                continue
            yield (check_estimators_nan_inf, name, Estimator, X_train,
                   X_train_finite,
                   multioutput_estimator_convert_y_2d(name, y))
예제 #16
0
def test_class_weight_classifiers():
    # test that class_weight works and that the semantics are consistent
    classifiers = all_estimators(type_filter='classifier')

    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers
                       if 'class_weight' in c[1]().get_params().keys()]

    for n_centers in [2, 3]:
        # create a very noisy dataset
        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
                                                            random_state=0)
        for name, Classifier in classifiers:
            if name == "NuSVC":
                # the sparse version has a parameter that doesn't do anything
                continue
            if name.endswith("NB"):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                continue
            check_class_weight_classifiers.description =\
                "check_class_weight_classfiers(%s, %d)" % (name, n_centers)
            yield (check_class_weight_classifiers, name, Classifier, X_train,
                   y_train, X_test, y_test)
예제 #17
0
def test_class_weight_auto_classifiers():
    # test that class_weight="auto" improves f1-score
    classifiers = all_estimators(type_filter='classifier')

    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers
                       if 'class_weight' in c[1]().get_params().keys()]

    for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]):
        # create unbalanced dataset
        X, y = make_classification(n_classes=n_classes, n_samples=200,
                                   n_features=10, weights=weights,
                                   random_state=0, n_informative=n_classes)
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
                                                            random_state=0)
        for name, Classifier in classifiers:
            if name == "NuSVC":
                # the sparse version has a parameter that doesn't do anything
                continue

            if name.startswith("RidgeClassifier"):
                # RidgeClassifier behaves unexpected
                # FIXME!
                continue

            if name.endswith("NB"):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                continue
            check_class_weight_auto_classifiers.description =\
                "check_class_weight_auto_classifiers(%s, %d)" % (name, n_classes)
            yield (check_class_weight_auto_classifiers, name, Classifier,
                   X_train, y_train, X_test, y_test, weights)
예제 #18
0
def test_transformers_sparse_data():
    # All estimators should either deal with sparse data, or raise an
    # intelligible error message
    rng = np.random.RandomState(0)
    X = rng.rand(40, 10)
    X[X < 0.8] = 0
    X = sparse.csr_matrix(X)
    y = (4 * rng.rand(40)).astype(np.int)
    estimators = all_estimators()
    estimators = [(name, E) for name, E in estimators if issubclass(E, TransformerMixin)]
    for name, Trans in estimators:
        if Trans in dont_test or Trans in meta_estimators:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            if Trans in [Scaler, StandardScaler]:
                trans = Trans(with_mean=False)
            else:
                trans = Trans()
        # fit
        try:
            trans.fit(X, y)
        except TypeError, e:
            if not "sparse" in repr(e):
                print ("Estimator %s doesn't seem to fail gracefully on " "sparse data" % name)
                traceback.print_exc(file=sys.stdout)
                raise e
        except Exception, exc:
            print ("Estimator %s doesn't seem to fail gracefully on " "sparse data" % name)
            traceback.print_exc(file=sys.stdout)
            raise exc
예제 #19
0
def test_regressors_train():
    estimators = all_estimators()
    regressors = [(name, E) for name, E in estimators if issubclass(E,
        RegressorMixin)]
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    # TODO: test with intercept
    # TODO: test with multiple responses
    X = Scaler().fit_transform(X)
    y = Scaler().fit_transform(y)
    for name, Reg in regressors:
        if Reg in dont_test or Reg in meta_estimators:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            reg = Reg()
        if hasattr(reg, 'alpha'):
            reg.set_params(alpha=0.01)

        # raises error on malformed input for fit
        assert_raises(ValueError, reg.fit, X, y[:-1])
        # fit
        reg.fit(X, y)
        reg.predict(X)
        assert_greater(reg.score(X, y), 0.5)
예제 #20
0
def test_non_transformer_estimators_n_iter():
    # Test that all estimators of type which are non-transformer
    # and which have an attribute of max_iter, return the attribute
    # of n_iter atleast 1.
    for est_type in ['regressor', 'classifier', 'cluster']:
        regressors = all_estimators(type_filter=est_type)
        for name, Estimator in regressors:
            # LassoLars stops early for the default alpha=1.0 for
            # the iris dataset.
            if name == 'LassoLars':
                estimator = Estimator(alpha=0.)
            else:
                estimator = Estimator()
            if hasattr(estimator, "max_iter"):
                # These models are dependent on external solvers like
                # libsvm and accessing the iter parameter is non-trivial.
                if name in (['Ridge', 'SVR', 'NuSVR', 'NuSVC',
                             'RidgeClassifier', 'SVC', 'RandomizedLasso',
                             'LogisticRegressionCV']):
                    continue

                # Tested in test_transformer_n_iter below
                elif (name in CROSS_DECOMPOSITION or
                      name in ['LinearSVC', 'LogisticRegression']):
                    continue

                else:
                    # Multitask models related to ENet cannot handle
                    # if y is mono-output.
                    yield (check_non_transformer_estimators_n_iter,
                           name, estimator, 'Multi' in name)
예제 #21
0
def test_class_weight_auto_classifiers():
    """Test that class_weight="auto" improves f1-score"""

    # This test is broken; its success depends on:
    # * a rare fortuitous RNG seed for make_classification; and
    # * the use of binary F1 over a seemingly arbitrary positive class for two
    #   datasets, and weighted average F1 for the third.
    # Its expectations need to be clarified and reimplemented.
    raise SkipTest("This test requires redefinition")

    classifiers = all_estimators(type_filter="classifier")

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers if "class_weight" in c[1]().get_params().keys()]

    for n_classes, weights in zip([2, 3], [[0.8, 0.2], [0.8, 0.1, 0.1]]):
        # create unbalanced dataset
        X, y = make_classification(
            n_classes=n_classes, n_samples=200, n_features=10, weights=weights, random_state=0, n_informative=n_classes
        )
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
        for name, Classifier in classifiers:
            if (
                name != "NuSVC"
                # the sparse version has a parameter that doesn't do anything
                and not name.startswith("RidgeClassifier")
                # RidgeClassifier behaves unexpected
                # FIXME!
                and not name.endswith("NB")
            ):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                yield (check_class_weight_auto_classifiers, name, Classifier, X_train, y_train, X_test, y_test, weights)
예제 #22
0
def test_transformers_pickle():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()

    succeeded = True

    for name, Transformer in transformers:
        if name in dont_test:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            transformer = Transformer()
        if not hasattr(transformer, 'transform'):
            continue
        set_random_state(transformer)
        if hasattr(transformer, 'compute_importances'):
            transformer.compute_importances = True

        if name == "SelectKBest":
            # SelectKBest has a default of k=10
            # which is more feature than we have.
            transformer.k = 1
        elif name in ['GaussianRandomProjection', 'SparseRandomProjection']:
            # Due to the jl lemma and very few samples, the number
            # of components of the random matrix projection will be greater
            # than the number of features.
            # So we impose a smaller number (avoid "auto" mode)
            transformer.n_components = 1

        # fit
        if name in ('PLSCanonical', 'PLSRegression', 'CCA',
                    'PLSSVD'):
            random_state = np.random.RandomState(seed=12345)
            y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        transformer.fit(X, y_)
        X_pred = transformer.fit(X, y_).transform(X)
        pickled_transformer = pickle.dumps(transformer)
        unpickled_transformer = pickle.loads(pickled_transformer)
        pickled_X_pred = unpickled_transformer.transform(X)

        try:
            assert_array_almost_equal(pickled_X_pred, X_pred)
        except Exception as exc:
            succeeded = False
            print ("Transformer %s doesn't predict the same value "
                   "after pickling" % name)
            raise exc

    assert_true(succeeded)
예제 #23
0
def test_classifiers_train():
    # test if classifiers do something sensible on training set
    # also test all shapes / shape errors
    estimators = all_estimators()
    classifiers = [(name, E) for name, E in estimators if issubclass(E,
        ClassifierMixin)]
    iris = load_iris()
    X, y = iris.data, iris.target
    X, y = shuffle(X, y, random_state=7)
    n_samples, n_features = X.shape
    n_labels = len(np.unique(y))
    X = Scaler().fit_transform(X)
    for name, Clf in classifiers:
        if Clf in dont_test or Clf in meta_estimators:
            continue
        if Clf in [MultinomialNB, BernoulliNB]:
            # TODO also test these!
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            clf = Clf()
        # raises error on malformed input for fit
        assert_raises(ValueError, clf.fit, X, y[:-1])

        # fit
        clf.fit(X, y)
        y_pred = clf.predict(X)
        assert_equal(y_pred.shape, (n_samples,))
        # training set performance
        assert_greater(zero_one_score(y, y_pred), 0.78)

        # raises error on malformed input for predict
        assert_raises(ValueError, clf.predict, X.T)
        if hasattr(clf, "decision_function"):
            try:
                # decision_function agrees with predict:
                decision = clf.decision_function(X)
                assert_equal(decision.shape, (n_samples, n_labels))
                # raises error on malformed input
                assert_raises(ValueError, clf.decision_function, X.T)
                if not isinstance(clf, BaseLibSVM):
                    # 1on1 of LibSVM works differently
                    assert_array_equal(np.argmax(decision, axis=1), y_pred)
                # raises error on malformed input for decision_function
                assert_raises(ValueError, clf.decision_function, X.T)
            except NotImplementedError:
                pass
        if hasattr(clf, "predict_proba"):
            try:
                # predict_proba agrees with predict:
                y_prob = clf.predict_proba(X)
                assert_equal(y_prob.shape, (n_samples, n_labels))
                # raises error on malformed input
                assert_raises(ValueError, clf.predict_proba, X.T)
                assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
                # raises error on malformed input for predict_proba
                assert_raises(ValueError, clf.predict_proba, X.T)
            except NotImplementedError:
                pass
예제 #24
0
def test_all_estimators():
    # Test that estimators are default-constructible, clonable
    # and have working repr.
    estimators = all_estimators(include_meta_estimators=True)
    classifier = LDA()

    for name, Estimator in estimators:
        # some can just not be sensibly default constructed
        if name in dont_test:
            continue
        # test default-constructibility
        # get rid of deprecation warnings
        with warnings.catch_warnings(record=True):
            if name in meta_estimators:
                estimator = Estimator(classifier)
            else:
                estimator = Estimator()
            # test cloning
            clone(estimator)
            # test __repr__
            repr(estimator)
            # test that set_params returns self
            assert_true(isinstance(estimator.set_params(), Estimator))

            # test if init does nothing but set parameters
            # this is important for grid_search etc.
            # We get the default parameters from init and then
            # compare these against the actual values of the attributes.

            # this comes from getattr. Gets rid of deprecation decorator.
            init = getattr(estimator.__init__, 'deprecated_original',
                           estimator.__init__)
            try:
                args, varargs, kws, defaults = inspect.getargspec(init)
            except TypeError:
                # init is not a python function.
                # true for mixins
                continue
            params = estimator.get_params()
            if name in meta_estimators:
                # they need a non-default argument
                args = args[2:]
            else:
                args = args[1:]
            if args:
                # non-empty list
                assert_equal(len(args), len(defaults))
            else:
                continue
            for arg, default in zip(args, defaults):
                if arg not in params.keys():
                    # deprecated parameter, not in get_params
                    assert_true(default is None)
                    continue

                if isinstance(params[arg], np.ndarray):
                    assert_array_equal(params[arg], default)
                else:
                    assert_equal(params[arg], default)
예제 #25
0
def test_cluster_overwrite_params():
    # test whether any classifier overwrites his init parameters during fit
    clusterers = all_estimators(type_filter="cluster")
    X, y = make_blobs(random_state=0, n_samples=9)
    # some want non-negative input
    X
    for name, Clustering in clusterers:
        yield check_cluster_overwrite_params, name, Clustering, X, y
예제 #26
0
def test_estimators_sparse_data():
    # All estimators should either deal with sparse data or raise an
    # exception with type TypeError and an intelligible error message
    estimators = all_estimators()
    estimators = [(name, Estimator) for name, Estimator in estimators
                  if issubclass(Estimator, (ClassifierMixin, RegressorMixin))]
    for name, Estimator in estimators:
        yield check_regressors_classifiers_sparse_data, name, Estimator
예제 #27
0
def _tested_linear_classifiers():
    classifiers = all_estimators(type_filter='classifier')

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        for name, clazz in classifiers:
            if ('class_weight' in clazz().get_params().keys() and
                    issubclass(clazz, LinearClassifierMixin)):
                yield name, clazz
예제 #28
0
def test_classifiers_data_not_an_array():
    classifiers = all_estimators(type_filter="classifier")
    X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]])
    y = [1, 1, 1, 2, 2, 2]

    for name, Classifier in classifiers:
        if name in dont_test:
            continue
        yield (check_estimators_data_not_an_array, name, Classifier, X, multioutput_estimator_convert_y_2d(name, y))
예제 #29
0
def test_estimators_overwrite_params():
    # test whether any classifier overwrites his init parameters during fit
    for est_type in ["classifier", "regressor", "transformer"]:
        estimators = all_estimators(type_filter=est_type)
        for name, Estimator in estimators:
            if name not in ["CCA", "_CCA", "PLSCanonical", "PLSRegression", "PLSSVD", "GaussianProcess"]:
                # FIXME!
                # in particular GaussianProcess!
                yield check_estimators_overwrite_params, name, Estimator
예제 #30
0
def test_get_params_invariance():
    # Test for estimators that support get_params, that
    # get_params(deep=False) is a subset of get_params(deep=True)
    # Related to issue #4465

    estimators = all_estimators(include_meta_estimators=False, include_other=True)
    for name, Estimator in estimators:
        if hasattr(Estimator, 'get_params'):
            yield check_get_params_invariance, name, Estimator
예제 #31
0
from sklearn.utils import IS_PYPY
from sklearn.utils.estimator_checks import (
    _yield_all_checks, _safe_tags, set_checking_parameters,
    check_parameters_default_constructible, check_no_attributes_set_in_init,
    check_class_weight_balanced_linear_classifier)


def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert not name.lower().startswith('base'), msg


@pytest.mark.parametrize('name, Estimator', all_estimators())
def test_parameters_default_constructible(name, Estimator):
    # Test that estimators are default-constructible
    check_parameters_default_constructible(name, Estimator)


def _tested_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        # FIXME _skip_test should be used here (if we could)

        required_parameters = getattr(Estimator, "_required_parameters", [])
        if len(required_parameters):
예제 #32
0
import warnings

warnings.filterwarnings('ignore')
#1 데이터

iris = pd.read_csv('./data/csv/iris.csv', header=0)

x = iris.iloc[:, 0:4]
y = iris.iloc[:, 4]

print(x)
print(y)

#x_train, x_test, y_train, y_test = train_test_split(x,y, test_size =0.2, random_state=6)

kfold = KFold(n_splits=5, shuffle=True)

warnings.filterwarnings('ignore')
allAlgorithms = all_estimators(type_filter='classifier')  # 모든 분류 모델 확인

for (name, algorithm) in allAlgorithms:
    model = algorithm()
    scores = cross_val_score(model, x, y, cv=kfold)

    print(name, "의 정답률 = ")
    print(scores)
    #model.fit(x, y)

import sklearn
print(sklearn.__version__)
예제 #33
0
def test_estimators_nan_inf():
    # Test that all estimators check their input for NaN's and infs
    rnd = np.random.RandomState(0)
    X_train_finite = rnd.uniform(size=(10, 3))
    X_train_nan = rnd.uniform(size=(10, 3))
    X_train_nan[0, 0] = np.nan
    X_train_inf = rnd.uniform(size=(10, 3))
    X_train_inf[0, 0] = np.inf
    y = np.ones(10)
    y[:5] = 0
    estimators = all_estimators()
    estimators = [
        (name, E) for name, E in estimators
        if (issubclass(E, ClassifierMixin) or issubclass(E, RegressorMixin)
            or issubclass(E, TransformerMixin) or issubclass(E, ClusterMixin))
    ]
    error_string_fit = "Estimator doesn't check for NaN and inf in fit."
    error_string_predict = ("Estimator doesn't check for NaN and inf in"
                            " predict.")
    error_string_transform = ("Estimator doesn't check for NaN and inf in"
                              " transform.")
    for X_train in [X_train_nan, X_train_inf]:
        for name, Estimator in estimators:
            if name in dont_test:
                continue
            if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA',
                        'PLSSVD'):
                continue

            # catch deprecation warnings
            with warnings.catch_warnings(record=True):
                estimator = Estimator()
                if name in [
                        'GaussianRandomProjection', 'SparseRandomProjection'
                ]:
                    # Due to the jl lemma and very few samples, the number
                    # of components of the random matrix projection will be
                    # greater
                    # than the number of features.
                    # So we impose a smaller number (avoid "auto" mode)
                    estimator = Estimator(n_components=1)

                set_random_state(estimator, 1)
                # try to fit
                try:
                    if issubclass(Estimator, ClusterMixin):
                        estimator.fit(X_train)
                    else:
                        estimator.fit(X_train, y)
                except ValueError as e:
                    if not 'inf' in repr(e) and not 'NaN' in repr(e):
                        print(error_string_fit, Estimator, e)
                        traceback.print_exc(file=sys.stdout)
                        raise e
                except Exception as exc:
                    print(error_string_fit, Estimator, exc)
                    traceback.print_exc(file=sys.stdout)
                    raise exc
                else:
                    raise AssertionError(error_string_fit, Estimator)
                # actually fit
                if issubclass(Estimator, ClusterMixin):
                    # All estimators except clustering algorithm
                    # support fitting with (optional) y
                    estimator.fit(X_train_finite)
                else:
                    estimator.fit(X_train_finite, y)

                # predict
                if hasattr(estimator, "predict"):
                    try:
                        estimator.predict(X_train)
                    except ValueError as e:
                        if not 'inf' in repr(e) and not 'NaN' in repr(e):
                            print(error_string_predict, Estimator, e)
                            traceback.print_exc(file=sys.stdout)
                            raise e
                    except Exception as exc:
                        print(error_string_predict, Estimator, exc)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        raise AssertionError(error_string_predict, Estimator)

                # transform
                if hasattr(estimator, "transform"):
                    try:
                        estimator.transform(X_train)
                    except ValueError as e:
                        if not 'inf' in repr(e) and not 'NaN' in repr(e):
                            print(error_string_transform, Estimator, e)
                            traceback.print_exc(file=sys.stdout)
                            raise e
                    except Exception as exc:
                        print(error_string_transform, Estimator, exc)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        raise AssertionError(error_string_transform, Estimator)
예제 #34
0
def test_transformers_pickle():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30,
                      centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0,
                      n_features=2,
                      cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()

    succeeded = True

    for name, Transformer in transformers:
        if name in dont_test:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            transformer = Transformer()
        if not hasattr(transformer, 'transform'):
            continue
        set_random_state(transformer)
        if hasattr(transformer, 'compute_importances'):
            transformer.compute_importances = True

        if name == "SelectKBest":
            # SelectKBest has a default of k=10
            # which is more feature than we have.
            transformer.k = 1
        elif name in ['GaussianRandomProjection', 'SparseRandomProjection']:
            # Due to the jl lemma and very few samples, the number
            # of components of the random matrix projection will be greater
            # than the number of features.
            # So we impose a smaller number (avoid "auto" mode)
            transformer.n_components = 1

        # fit
        if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
            random_state = np.random.RandomState(seed=12345)
            y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        transformer.fit(X, y_)
        X_pred = transformer.fit(X, y_).transform(X)
        pickled_transformer = pickle.dumps(transformer)
        unpickled_transformer = pickle.loads(pickled_transformer)
        pickled_X_pred = unpickled_transformer.transform(X)

        try:
            assert_array_almost_equal(pickled_X_pred, X_pred)
        except Exception as exc:
            succeeded = False
            print("Transformer %s doesn't predict the same value "
                  "after pickling" % name)
            raise exc

    assert_true(succeeded)
예제 #35
0
def test_classifiers_train():
    # test if classifiers do something sensible on training set
    # also test all shapes / shape errors
    classifiers = all_estimators(type_filter='classifier')
    X_m, y_m = make_blobs(random_state=0)
    X_m, y_m = shuffle(X_m, y_m, random_state=7)
    X_m = StandardScaler().fit_transform(X_m)
    # generate binary problem from multi-class one
    y_b = y_m[y_m != 2]
    X_b = X_m[y_m != 2]
    for (X, y) in [(X_m, y_m), (X_b, y_b)]:
        # do it once with binary, once with multiclass
        classes = np.unique(y)
        n_classes = len(classes)
        n_samples, n_features = X.shape
        for name, Classifier in classifiers:
            if name in dont_test:
                continue
            if name in ['MultinomialNB', 'BernoulliNB']:
                # TODO also test these!
                continue
            # catch deprecation warnings
            with warnings.catch_warnings(record=True):
                classifier = Classifier()
            # raises error on malformed input for fit
            assert_raises(ValueError, classifier.fit, X, y[:-1])

            # fit
            classifier.fit(X, y)
            assert_true(hasattr(classifier, "classes_"))
            y_pred = classifier.predict(X)
            assert_equal(y_pred.shape, (n_samples, ))
            # training set performance
            assert_greater(accuracy_score(y, y_pred), 0.85)

            # raises error on malformed input for predict
            assert_raises(ValueError, classifier.predict, X.T)
            if hasattr(classifier, "decision_function"):
                try:
                    # decision_function agrees with predict:
                    decision = classifier.decision_function(X)
                    if n_classes is 2:
                        assert_equal(decision.ravel().shape, (n_samples, ))
                        dec_pred = (decision.ravel() > 0).astype(np.int)
                        assert_array_equal(dec_pred, y_pred)
                    if (n_classes is 3
                            and not isinstance(classifier, BaseLibSVM)):
                        # 1on1 of LibSVM works differently
                        assert_equal(decision.shape, (n_samples, n_classes))
                        assert_array_equal(np.argmax(decision, axis=1), y_pred)

                    # raises error on malformed input
                    assert_raises(ValueError, classifier.decision_function,
                                  X.T)
                    # raises error on malformed input for decision_function
                    assert_raises(ValueError, classifier.decision_function,
                                  X.T)
                except NotImplementedError:
                    pass
            if hasattr(classifier, "predict_proba"):
                try:
                    # predict_proba agrees with predict:
                    y_prob = classifier.predict_proba(X)
                    assert_equal(y_prob.shape, (n_samples, n_classes))
                    assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
                    # check that probas for all classes sum to one
                    assert_array_almost_equal(np.sum(y_prob, axis=1),
                                              np.ones(n_samples))
                    # raises error on malformed input
                    assert_raises(ValueError, classifier.predict_proba, X.T)
                    # raises error on malformed input for predict_proba
                    assert_raises(ValueError, classifier.predict_proba, X.T)
                except NotImplementedError:
                    pass
예제 #36
0
                    str(e) for e in descriptions)
            else:
                completeDescription = str(type_) + "\n \n" + " ".join(
                    str(e) for e in descriptions)

            # add into the dict at the key (name of the params)
            # a tuple (instance of type, default value of params, description)
            dico[name] = (type_map.get(types.group()), classifierTemp[name],
                          completeDescription)
    return dico


# ----------------------------------------------------------------------------------------------------------------
# ---------------------------------------- retrieves all estimators ----------------------------------------------
# ----------------------------------------------------------------------------------------------------------------
for name, class_ in all_estimators():

    # Retrieves the type of the current estimator
    typeclass = str(getattr(class_, "_estimator_type", None))

    # Delete the wrong estimator and check if it is a classifier
    if "_" not in name and typeclass == "classifier":

        # Retrieves the name and path of the module, from scikit for the current estimator
        modulePath = str(class_).split("'")[1]

        # Check if the classifier's name is in the path
        # If this is in, it is removed from the module path
        if name in modulePath:
            # Remove the name on the module path and stock an just-in-time import for the key (name of classifier)
            dictEstimator[name] = getattr(
예제 #37
0
    roc_auc_score,
    f1_score,
    r2_score,
    mean_squared_error,
)
import warnings
import xgboost

# import catboost
import lightgbm

warnings.filterwarnings("ignore")
pd.set_option("display.precision", 2)
pd.set_option("display.float_format", lambda x: "%.2f" % x)

CLASSIFIERS = [est for est in all_estimators() if issubclass(est[1], ClassifierMixin)]
REGRESSORS = [est for est in all_estimators() if issubclass(est[1], RegressorMixin)]

removed_classifiers = [
    ("CheckingClassifier", sklearn.utils._mocking.CheckingClassifier),
    ("ClassifierChain", sklearn.multioutput.ClassifierChain),
    ("ComplementNB", sklearn.naive_bayes.ComplementNB),
    (
        "GradientBoostingClassifier",
        sklearn.ensemble.gradient_boosting.GradientBoostingClassifier,
    ),
    (
        "GaussianProcessClassifier",
        sklearn.gaussian_process.gpc.GaussianProcessClassifier,
    ),
    (
예제 #38
0
from sklearn.compose import ColumnTransformer
from sklearn.utils.testing import all_estimators
from sklearn.base import RegressorMixin
from sklearn.base import ClassifierMixin
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, r2_score, mean_squared_error
import warnings
import xgboost
# import catboost
import lightgbm

warnings.filterwarnings("ignore")
pd.set_option("display.precision", 2)
pd.set_option("display.float_format", lambda x: '%.2f' % x)

CLASSIFIERS = [
    est for est in all_estimators() if issubclass(est[1], ClassifierMixin)
]
REGRESSORS = [
    est for est in all_estimators() if issubclass(est[1], RegressorMixin)
]

removed_classifiers = [
    ('ClassifierChain', sklearn.multioutput.ClassifierChain),
    ('ComplementNB', sklearn.naive_bayes.ComplementNB),
    ('GradientBoostingClassifier',
     sklearn.ensemble.gradient_boosting.GradientBoostingClassifier),
    ('GaussianProcessClassifier',
     sklearn.gaussian_process.gpc.GaussianProcessClassifier),
    ('HistGradientBoostingClassifier', sklearn.ensemble.
     _hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier),
    ('MLPClassifier',
예제 #39
0
from sklearn.model_selection import KFold
import warnings
from sklearn.model_selection import cross_val_score

# アヤメデータの読み込み
iris_data = pd.read_csv("iris.csv", encoding="utf-8")

# アヤメデータをラベルと入力データに分離する
y = iris_data.loc[:, "Name"]
x = iris_data.loc[:, ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"]]

# 学習用とテスト用に分離する
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, train_size = 0.8, shuffle = True)

# classifierのアルゴリズムをすべて取得する
allAlgorithms = all_estimators(type_filter="classifier")
# warnings.simplefilter("error")

# K分割クロスバリデーション用オブジェクト
kfold_cv = KFold(n_splits=5, shuffle=True)
warnings.filterwarnings('ignore')

for(name, algorithm) in allAlgorithms:
    try:
        # 各アルゴリズムのオブジェクトを作成
        if(name == "LinearSVC"):
            clf = algorithm(max_iter = 10000)
        else:
            clf = algorithm()

        # scoreメソッドを持つクラスを対象とする
예제 #40
0
def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert_false(name.lower().startswith('base'), msg=msg)
예제 #41
0
    for name, Estimator in all_estimators():
        msg = ("Base estimators such as {0} should not be included"
               " in all_estimators").format(name)
        assert not name.lower().startswith('base'), msg


def test_all_estimators():
    estimators = all_estimators(include_meta_estimators=True)

    # Meta sanity-check to make sure that the estimator introspection runs
    # properly
    assert_greater(len(estimators), 0)


@pytest.mark.parametrize('name, Estimator',
                         all_estimators(include_meta_estimators=True))
def test_parameters_default_constructible(name, Estimator):
    # Test that estimators are default-constructible
    check_parameters_default_constructible(name, Estimator)


def _tested_non_meta_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        yield name, Estimator


def _generate_checks_per_estimator(check_generator, estimators):
예제 #42
0
def test_estimators_sparse_data():
    # All estimators should either deal with sparse data or raise an
    # exception with type TypeError and an intelligible error message
    estimators = all_estimators(type_filter=['classifier', 'regressor'])
    for name, Estimator in estimators:
        yield check_regressors_classifiers_sparse_data, name, Estimator
예제 #43
0
start = time.time()

# データの読み込み
dataset = pd.read_csv(
    "C:/Users/yunom/Desktop/Output_python/Dataset_amedas.csv")
dataset = dataset.drop('Unnamed: 0', axis=1)

# データをラベルと入力データに分離する
target_col = 'MORE_AMP'
feature_col = dataset.columns[1:]
feature_col = np.array(feature_col)
y = np.array(dataset[target_col])
x = np.array(dataset[feature_col])

# classifierのすべてのアルゴリズムを取得する
allAlgorithms = all_estimators(type_filter="regressor")

list_name = []
for i in range(100):
    list_rmse_1 = []
    list_rmse_2 = []
    list_score_train = []
    list_score_test = []
    list_stdev = []
    # 学習用とテスト用に分離する
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=i)

    start = time.time()
예제 #44
0
def get_model_configs(
    my_models: Union[str, List[str]],
    class_key="CLASS",
    fit_key="FIT",
    meta_key="META",
) -> Union[dict, List[dict]]:
    """build sklearn model configuration parameters
    
    Take (full) class name of an scikit-learn model 
    and retrieve its `class` and `fit` parameters and
    their default values.
    
    Also returns some useful metadata values for the class
    """
    # get a list of all sklearn estimators
    estimators = all_estimators()

    def _get_estimator(pkg_class):
        """find a specific class in a list of sklearn estimators"""
        my_class = pkg_class.split('.')[-1]
        return list(filter(lambda x: x[0] == my_class, estimators))[0]

    # find estimators corresponding to my_models list
    my_estimators = []
    my_models = [my_models] if isinstance(my_models, str) else my_models
    for model in my_models:
        estimator_name, estimator_class = _get_estimator(model)
        my_estimators.append((estimator_name, estimator_class))

    # get class and fit specs
    estimator_specs = []
    for an_estimator in my_estimators:
        estimator_specs.append((
            an_estimator[0],  # model only name
            getfullargspec(an_estimator[1]),  # class params
            getfullargspec(an_estimator[1].fit),  # fit params
            an_estimator[1]))  # package.module.model

    model_configs = []

    for estimator in estimator_specs:
        model_json = {class_key: {}, fit_key: {}}
        fit_params = {}

        for i, key in enumerate(model_json.keys()):
            f = estimator[i + 1]
            args_paired = []
            defs_paired = []

            # reverse the args since there are fewer defaults than args
            args = f.args
            args.reverse()
            n_args = len(args)

            defs = f.defaults
            if defs is None:
                defs = [defs]
            defs = list(defs)
            defs.reverse()
            n_defs = len(defs)

            n_smallest = min(n_args, n_defs)
            n_largest = max(n_args, n_defs)

            # build 2 lists that can be concatenated
            for ix in range(n_smallest):
                if args[ix] is not "self":
                    args_paired.append(args[ix])
                    defs_paired.append(defs[ix])

            for ix in range(n_smallest, n_largest):
                if ix is not 0 and args[ix] is not "self":
                    args_paired.append(args[ix])
                    defs_paired.append(None)
            # concatenate lists into appropriate structure
            model_json[key] = dict(
                zip(reversed(args_paired), reversed(defs_paired)))

        model_json[meta_key] = {}
        model_json[meta_key]['sklearn_version'] = skversion
        model_json[meta_key]['class'] = '.'.join(
            [estimator[3].__module__, estimator[0]])
        model_configs.append(model_json)
    if len(model_configs) == 1:
        # do we want to log this modified model as an artifact?
        return model_configs[0]
    else:
        # do we want to log this modified model as an artifact?
        return model_configs
예제 #45
0
def test_all_estimators():
    estimators = all_estimators(include_meta_estimators=True)

    # Meta sanity-check to make sure that the estimator introspection runs
    # properly
    assert_greater(len(estimators), 0)
예제 #46
0
파일: init.py 프로젝트: hjanime/VisTrails
def discover_clustering():
    return [
        make_module(name, Est, "clustering")
        for (name, Est) in all_estimators(type_filter="cluster")
    ]
예제 #47
0
def test_transformers():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30,
                      centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0,
                      n_features=2,
                      cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()

    succeeded = True

    for name, Transformer in transformers:
        if name in dont_test:
            continue
        # these don't actually fit the data:
        if name in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            transformer = Transformer()
        set_random_state(transformer)
        if hasattr(transformer, 'compute_importances'):
            transformer.compute_importances = True

        if name == 'SelectKBest':
            # SelectKBest has a default of k=10
            # which is more feature than we have.
            transformer.k = 1
        elif name in ['GaussianRandomProjection', 'SparseRandomProjection']:
            # Due to the jl lemma and very few samples, the number
            # of components of the random matrix projection will be greater
            # than the number of features.
            # So we impose a smaller number (avoid "auto" mode)
            transformer.n_components = 1
        elif name == "MiniBatchDictionaryLearning":
            transformer.set_params(n_iter=5)  # default = 1000

        elif name == "KernelPCA":
            transformer.remove_zero_eig = False

        # fit

        if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
            random_state = np.random.RandomState(seed=12345)
            y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        try:
            transformer.fit(X, y_)
            X_pred = transformer.fit_transform(X, y=y_)
            if isinstance(X_pred, tuple):
                for x_pred in X_pred:
                    assert_equal(x_pred.shape[0], n_samples)
            else:
                assert_equal(X_pred.shape[0], n_samples)
        except Exception as e:
            print(transformer)
            print(e)
            print()
            succeeded = False
            continue

        if hasattr(transformer, 'transform'):
            if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA',
                        'PLSSVD'):
                X_pred2 = transformer.transform(X, y_)
                X_pred3 = transformer.fit_transform(X, y=y_)
            else:
                X_pred2 = transformer.transform(X)
                X_pred3 = transformer.fit_transform(X, y=y_)
            if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):
                for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3):
                    assert_array_almost_equal(
                        x_pred, x_pred2, 2,
                        "fit_transform not correct in %s" % Transformer)
                    assert_array_almost_equal(
                        x_pred3, x_pred2, 2,
                        "fit_transform not correct in %s" % Transformer)
            else:
                assert_array_almost_equal(
                    X_pred, X_pred2, 2,
                    "fit_transform not correct in %s" % Transformer)
                assert_array_almost_equal(
                    X_pred3, X_pred2, 2,
                    "fit_transform not correct in %s" % Transformer)

            # raises error on malformed input for transform
            assert_raises(ValueError, transformer.transform, X.T)
    assert_true(succeeded)