def test_graphviz_errors(): # Check for errors of export_graphviz clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2) # Check not-fitted decision tree error out = StringIO() assert_raises(NotFittedError, export_graphviz, clf, out) clf.fit(X, y) # Check if it errors when length of feature_names # mismatches with number of features message = ("Length of feature_names, " "1 does not match number of features, 2") assert_raise_message(ValueError, message, export_graphviz, clf, None, feature_names=["a"]) message = ("Length of feature_names, " "3 does not match number of features, 2") assert_raise_message(ValueError, message, export_graphviz, clf, None, feature_names=["a", "b", "c"]) # Check class_names error out = StringIO() assert_raises(IndexError, export_graphviz, clf, out, class_names=[]) # Check precision error out = StringIO() assert_raises_regex(ValueError, "should be greater or equal", export_graphviz, clf, out, precision=-1) assert_raises_regex(ValueError, "should be an integer", export_graphviz, clf, out, precision="1")
def test_enet_l1_ratio(): # Test that an error message is raised if an estimator that # uses _alpha_grid is called with l1_ratio=0 msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. " "Please supply a grid by providing your estimator with the " "appropriate `alphas=` argument.") X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T y = np.array([12, 10, 11, 21, 5]) assert_raise_message(ValueError, msg, ElasticNetCV( l1_ratio=0, random_state=42).fit, X, y) assert_raise_message(ValueError, msg, MultiTaskElasticNetCV( l1_ratio=0, random_state=42).fit, X, y[:, None]) # Test that l1_ratio=0 is allowed if we supply a grid manually alphas = [0.1, 10] estkwds = {'alphas': alphas, 'random_state': 42} est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds) est = ElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est_desired.fit(X, y) est.fit(X, y) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds) est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est.fit(X, y[:, None]) est_desired.fit(X, y[:, None]) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
def test_check_solver_option(LR): X, y = iris.data, iris.target msg = ('Logistic Regression supports only liblinear, newton-cg, ' 'lbfgs, sag and saga solvers, got wrong_name') lr = LR(solver="wrong_name") assert_raise_message(ValueError, msg, lr.fit, X, y) msg = "multi_class should be either multinomial or ovr, got wrong_name" lr = LR(solver='newton-cg', multi_class="wrong_name") assert_raise_message(ValueError, msg, lr.fit, X, y) # only 'liblinear' solver msg = "Solver liblinear does not support a multinomial backend." lr = LR(solver='liblinear', multi_class='multinomial') assert_raise_message(ValueError, msg, lr.fit, X, y) # all solvers except 'liblinear' for solver in ['newton-cg', 'lbfgs', 'sag']: msg = ("Solver %s supports only l2 penalties, got l1 penalty." % solver) lr = LR(solver=solver, penalty='l1') assert_raise_message(ValueError, msg, lr.fit, X, y) for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']: msg = ("Solver %s supports only dual=False, got dual=True" % solver) lr = LR(solver=solver, dual=True) assert_raise_message(ValueError, msg, lr.fit, X, y)
def test_pls_errors(): d = load_linnerud() X = d.data Y = d.target for clf in [pls_.PLSCanonical(), pls_.PLSRegression(), pls_.PLSSVD()]: clf.n_components = 4 assert_raise_message(ValueError, "Invalid number of components", clf.fit, X, Y)
def test_assert_raise_message(): def _raise_ValueError(message): raise ValueError(message) def _no_raise(): pass assert_raise_message(ValueError, "test", _raise_ValueError, "test") assert_raises(AssertionError, assert_raise_message, ValueError, "something else", _raise_ValueError, "test") assert_raises(ValueError, assert_raise_message, TypeError, "something else", _raise_ValueError, "test") assert_raises(AssertionError, assert_raise_message, ValueError, "test", _no_raise) # multiple exceptions in a tuple assert_raises(AssertionError, assert_raise_message, (ValueError, AttributeError), "test", _no_raise)
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test): dense_svm.fit(X_train.toarray(), y_train) if sparse.isspmatrix(X_test): X_test_dense = X_test.toarray() else: X_test_dense = X_test sparse_svm.fit(X_train, y_train) assert sparse.issparse(sparse_svm.support_vectors_) assert sparse.issparse(sparse_svm.dual_coef_) assert_array_almost_equal(dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray()) assert_array_almost_equal(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray()) if dense_svm.kernel == "linear": assert sparse.issparse(sparse_svm.coef_) assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray()) assert_array_almost_equal(dense_svm.support_, sparse_svm.support_) assert_array_almost_equal(dense_svm.predict(X_test_dense), sparse_svm.predict(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)) assert_array_almost_equal(dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test_dense)) if isinstance(dense_svm, svm.OneClassSVM): msg = "cannot use sparse input in 'OneClassSVM' trained on dense data" else: assert_array_almost_equal(dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4) msg = "cannot use sparse input in 'SVC' trained on dense data" if sparse.isspmatrix(X_test): assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
def test_multilabel_binarizer_given_classes(): inp = [(2, 3), (1,), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]]) # fit_transform() mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit_transform(inp), indicator_mat) assert_array_equal(mlb.classes_, [1, 3, 2]) # fit().transform() mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) assert_array_equal(mlb.classes_, [1, 3, 2]) # ensure works with extra class mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2]) assert_array_equal(mlb.fit_transform(inp), np.hstack(([[0], [0], [0]], indicator_mat))) assert_array_equal(mlb.classes_, [4, 1, 3, 2]) # ensure fit is no-op as iterable is not consumed inp = iter(inp) mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) # ensure a ValueError is thrown if given duplicate classes err_msg = "The classes argument contains duplicate classes. Remove " \ "these duplicates before passing them to MultiLabelBinarizer." mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3]) assert_raise_message(ValueError, err_msg, mlb.fit, inp)
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, sample_weight=None, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert_equal(centers.shape, (n_clusters, n_features)) labels = labels assert_equal(np.unique(labels).shape[0], n_clusters) # check that the labels assignment are perfect (up to a permutation) assert_equal(v_measure_score(true_labels, labels), 1.0) assert_greater(inertia, 0.0) # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, sample_weight=None, init=centers) # to many clusters desired assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1, sample_weight=None) # kmeans for algorithm='elkan' raises TypeError on sparse matrix assert_raise_message(TypeError, "algorithm='elkan' not supported for " "sparse input X", k_means, X=X_csr, n_clusters=2, sample_weight=None, algorithm="elkan")
def check_dtype_object(name, Estimator): # check that estimators treat dtype object as numeric if possible rng = np.random.RandomState(0) X = rng.rand(40, 10).astype(object) y = (X[:, 0] * 4).astype(np.int) y = multioutput_estimator_convert_y_2d(name, y) with warnings.catch_warnings(): estimator = Estimator() set_fast_parameters(estimator) estimator.fit(X, y) if hasattr(estimator, "predict"): estimator.predict(X) if hasattr(estimator, "transform"): estimator.transform(X) try: estimator.fit(X, y.astype(object)) except Exception as e: if "Unknown label type" not in str(e): raise X[0, 0] = {'foo': 'bar'} assert_raise_message(TypeError, "string or a number", estimator.fit, X, y)
def test_nmf_negative_beta_loss(): # Test that an error is raised if beta_loss < 0 and X contains zeros. # Test that the output has not NaN values when the input contains zeros. n_samples = 6 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = sp.csr_matrix(X) def _assert_nmf_no_nan(X, beta_loss): W, H, _ = non_negative_factorization( X, init='random', n_components=n_components, solver='mu', beta_loss=beta_loss, random_state=0, max_iter=1000) assert not np.any(np.isnan(W)) assert not np.any(np.isnan(H)) msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge." for beta_loss in (-0.6, 0.): assert_raise_message(ValueError, msg, _assert_nmf_no_nan, X, beta_loss) _assert_nmf_no_nan(X + 1e-9, beta_loss) for beta_loss in (0.2, 1., 1.2, 2., 2.5): _assert_nmf_no_nan(X, beta_loss) _assert_nmf_no_nan(X_csr, beta_loss)
def test_step_name_validation(): bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))] bad_steps2 = [('a', Mult(2)), ('a', Mult(3))] for cls, param in [(Pipeline, 'steps'), (FeatureUnion, 'transformer_list')]: # we validate in construction (despite scikit-learn convention) bad_steps3 = [('a', Mult(2)), (param, Mult(3))] for bad_steps, message in [ (bad_steps1, "Step names must not contain __: got ['a__q']"), (bad_steps2, "Names provided are not unique: ['a', 'a']"), (bad_steps3, "Step names conflict with constructor " "arguments: ['%s']" % param), ]: # three ways to make invalid: # - construction assert_raise_message(ValueError, message, cls, **{param: bad_steps}) # - setattr est = cls(**{param: [('a', Mult(1))]}) setattr(est, param, bad_steps) assert_raise_message(ValueError, message, est.fit, [[1]], [1]) assert_raise_message(ValueError, message, est.fit_transform, [[1]], [1]) # - set_params est = cls(**{param: [('a', Mult(1))]}) est.set_params(**{param: bad_steps}) assert_raise_message(ValueError, message, est.fit, [[1]], [1]) assert_raise_message(ValueError, message, est.fit_transform, [[1]], [1])
def test_raises_value_error_if_sample_weights_greater_than_1d(): # Sample weights must be either scalar or 1D n_sampless = [2, 3] n_featuress = [3, 2] rng = np.random.RandomState(42) for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights_OK = rng.randn(n_samples) ** 2 + 1 sample_weights_OK_1 = 1.0 sample_weights_OK_2 = 2.0 sample_weights_not_OK = sample_weights_OK[:, np.newaxis] sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :] ridge = Ridge(alpha=1) # make sure the "OK" sample weights actually work ridge.fit(X, y, sample_weights_OK) ridge.fit(X, y, sample_weights_OK_1) ridge.fit(X, y, sample_weights_OK_2) def fit_ridge_not_ok(): ridge.fit(X, y, sample_weights_not_OK) def fit_ridge_not_ok_2(): ridge.fit(X, y, sample_weights_not_OK_2) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok_2)
def test_set_pipeline_step_none(): # Test setting Pipeline steps to None X = np.array([[1]]) y = np.array([1]) mult2 = Mult(mult=2) mult3 = Mult(mult=3) mult5 = Mult(mult=5) def make(): return Pipeline([("m2", mult2), ("m3", mult3), ("last", mult5)]) pipeline = make() exp = 2 * 3 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline.set_params(m3=None) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert_dict_equal( pipeline.get_params(deep=True), {"steps": pipeline.steps, "m2": mult2, "m3": None, "last": mult5, "m2__mult": 2, "last__mult": 5}, ) pipeline.set_params(m2=None) exp = 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) # for other methods, ensure no AttributeErrors on None: other_methods = ["predict_proba", "predict_log_proba", "decision_function", "transform", "score"] for method in other_methods: getattr(pipeline, method)(X) pipeline.set_params(m2=mult2) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline = make() pipeline.set_params(last=None) # mult2 and mult3 are active exp = 6 assert_array_equal([[exp]], pipeline.fit(X, y).transform(X)) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert_raise_message(AttributeError, "'NoneType' object has no attribute 'predict'", getattr, pipeline, "predict") # Check None step at construction time exp = 2 * 5 pipeline = Pipeline([("m2", mult2), ("m3", None), ("last", mult5)]) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]]))
def test_binary_clf_curve(): rng = check_random_state(404) y_true = rng.randint(0, 3, size=10) y_pred = rng.rand(10) msg = "multiclass format is not supported" assert_raise_message(ValueError, msg, precision_recall_curve, y_true, y_pred)
def test_logistic_regressioncv_class_weights(): X, y = make_classification(n_samples=20, n_features=20, n_informative=10, n_classes=3, random_state=0) msg = ("In LogisticRegressionCV the liblinear solver cannot handle " "multiclass with class_weight of type dict. Use the lbfgs, " "newton-cg or sag solvers or set class_weight='balanced'") clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2}, solver='liblinear') assert_raise_message(ValueError, msg, clf_lib.fit, X, y) y_ = y.copy() y_[y == 2] = 1 clf_lib.fit(X, y_) assert_array_equal(clf_lib.classes_, [0, 1]) # Test for class_weight=balanced X, y = make_classification(n_samples=20, n_features=20, n_informative=10, random_state=0) clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False, class_weight='balanced') clf_lbf.fit(X, y) clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False, class_weight='balanced') clf_lib.fit(X, y) clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False, class_weight='balanced', max_iter=2000) clf_sag.fit(X, y) assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4) assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4) assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)
def test_ovo_one_class(): # Test error for OvO with one class X = np.eye(4) y = np.array(['a'] * 4) ovo = OneVsOneClassifier(LinearSVC()) assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
def test_ovo_float_y(): # Test that the OvO errors on float targets X = iris.data y = iris.data[:, 0] ovo = OneVsOneClassifier(LinearSVC()) assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
def test_grid_search_groups(): # Check if ValueError (when groups is None) propagates to GridSearchCV # And also check if groups is correctly passed to the cv object rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) groups = rng.randint(0, 3, 15) clf = LinearSVC(random_state=0) grid = {'C': [1]} group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(), GroupShuffleSplit()] for cv in group_cvs: gs = GridSearchCV(clf, grid, cv=cv) assert_raise_message(ValueError, "The groups parameter should not be None", gs.fit, X, y) gs.fit(X, y, groups=groups) non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()] for cv in non_group_cvs: gs = GridSearchCV(clf, grid, cv=cv) # Should not raise an error gs.fit(X, y)
def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) y = np.array([1, 2, 6, 8, 10]) clf = BayesianRidge(n_iter=0) msg = "n_iter should be greater than or equal to 1." assert_raise_message(ValueError, msg, clf.fit, X, y)
def test_sample_weight(): """Tests sample_weight parameter of VotingClassifier""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = SVC(probability=True, random_state=123) eclf1 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft').fit(X, y, sample_weight=np.ones((len(y),))) eclf2 = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) sample_weight = np.random.RandomState(123).uniform(size=(len(y),)) eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft') eclf3.fit(X, y, sample_weight) clf1.fit(X, y, sample_weight) assert_array_equal(eclf3.predict(X), clf1.predict(X)) assert_array_equal(eclf3.predict_proba(X), clf1.predict_proba(X)) clf4 = KNeighborsClassifier() eclf3 = VotingClassifier(estimators=[ ('lr', clf1), ('svc', clf3), ('knn', clf4)], voting='soft') msg = ('Underlying estimator \'knn\' does not support sample weights.') assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='soft') msg = ("This VotingClassifier instance is not fitted yet. Call \'fit\'" " with appropriate arguments before using this method.") assert_raise_message(NotFittedError, msg, eclf.predict_proba, X)
def test_calinski_harabaz_score(): rng = np.random.RandomState(seed=0) # Assert message when there is only one label assert_raise_message(ValueError, "Number of labels is", calinski_harabaz_score, rng.rand(10, 2), np.zeros(10)) # Assert message when all point are in different clusters assert_raise_message(ValueError, "Number of labels is", calinski_harabaz_score, rng.rand(10, 2), np.arange(10)) # Assert the value is 1. when all samples are equals assert_equal(1., calinski_harabaz_score(np.ones((10, 2)), [0] * 5 + [1] * 5)) # Assert the value is 0. when all the mean cluster are equal assert_equal(0., calinski_harabaz_score([[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10)) # General case (with non numpy arrays) X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 + [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5) labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10 assert_almost_equal(calinski_harabaz_score(X, labels), 45 * (40 - 4) / (5 * (4 - 1)))
def test_string_attribute(monkeypatch, gzip_response): data_id = 40945 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # single column test assert_raise_message(ValueError, 'STRING attributes are not yet supported', fetch_openml, data_id=data_id, cache=False)
def test_error_on_y_transform(): data = patsy.demo_data("x1", "x2", "x3", "y") est = PatsyTransformer("y ~ x1 + x2") msg = ("encountered outcome variables for a model" " that does not expect them") assert_raise_message(patsy.PatsyError, msg, est.fit, data) assert_raise_message(patsy.PatsyError, msg, est.fit_transform, data)
def test_fetch_nonexiting(monkeypatch, gzip_response): # there is no active version of glass2 data_id = 40675 _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response) # Note that we only want to search by name (not data id) assert_raise_message(ValueError, "No active dataset glass2 found", fetch_openml, name='glass2', cache=False)
def test_classification_inf_nan_input(metric): # Classification metrics all raise a mixed input exception for y_true, y_score in invalids: assert_raise_message(ValueError, "Classification metrics can't handle a mix " "of binary and continuous targets", metric, y_true, y_score)
def test_classification_inf_nan_input(metric): # Classification metrics all raise a mixed input exception for y_true, y_score in invalids: assert_raise_message(ValueError, "Input contains NaN, infinity or a " "value too large", metric, y_true, y_score)
def test_score(): covar_type = 'full' rng = np.random.RandomState(0) rand_data = RandomData(rng, scale=7) n_components = rand_data.n_components X = rand_data.X[covar_type] # Check the error message if we don't call fit gmm1 = GaussianMixture(n_components=n_components, n_init=1, max_iter=1, reg_covar=0, random_state=rng, covariance_type=covar_type) assert_raise_message(NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", gmm1.score, X) # Check score value with warnings.catch_warnings(): warnings.simplefilter("ignore", ConvergenceWarning) gmm1.fit(X) gmm_score = gmm1.score(X) gmm_score_proba = gmm1.score_samples(X).mean() assert_almost_equal(gmm_score, gmm_score_proba) # Check if the score increase gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) assert_greater(gmm2.score(X), gmm1.score(X))
def test_n_components(): rng = np.random.RandomState(42) X = np.arange(12).reshape(4, 3) y = [1, 1, 2, 2] init = rng.rand(X.shape[1] - 1, 3) # n_components = X.shape[1] != transformation.shape[0] n_components = X.shape[1] nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components) assert_raise_message(ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) does not match ' 'the output dimensionality of the given ' 'linear transformation `init` ({})!' .format(n_components, init.shape[0]), nca.fit, X, y) # n_components > X.shape[1] n_components = X.shape[1] + 2 nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components) assert_raise_message(ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) cannot ' 'be greater than the given data ' 'dimensionality ({})!' .format(n_components, X.shape[1]), nca.fit, X, y) # n_components < X.shape[1] nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity') nca.fit(X, y)
def test_check_solver_option(): X, y = iris.data, iris.target for LR in [LogisticRegression, LogisticRegressionCV]: msg = ("Logistic Regression supports only liblinear, newton-cg and" " lbfgs solvers, got wrong_name") lr = LR(solver="wrong_name") assert_raise_message(ValueError, msg, lr.fit, X, y) msg = "multi_class should be either multinomial or ovr, got wrong_name" lr = LR(solver='newton-cg', multi_class="wrong_name") assert_raise_message(ValueError, msg, lr.fit, X, y) # all solver except 'newton-cg' and 'lfbgs' for solver in ['liblinear']: msg = ("Solver %s does not support a multinomial backend." % solver) lr = LR(solver=solver, multi_class='multinomial') assert_raise_message(ValueError, msg, lr.fit, X, y) # all solvers except 'liblinear' for solver in ['newton-cg', 'lbfgs']: msg = ("Solver %s supports only l2 penalties, got l1 penalty." % solver) lr = LR(solver=solver, penalty='l1') assert_raise_message(ValueError, msg, lr.fit, X, y) msg = ("Solver %s supports only dual=False, got dual=True" % solver) lr = LR(solver=solver, dual=True) assert_raise_message(ValueError, msg, lr.fit, X, y)
def assert_raises_on_all_points_same_cluster(func): """Assert message when all point are in different clusters""" rng = np.random.RandomState(seed=0) assert_raise_message(ValueError, "Number of labels is", func, rng.rand(10, 2), np.arange(10))
def test_fast_dot(): """Check fast dot blas wrapper function""" rng = np.random.RandomState(42) A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas('gemm') has_blas = True except: has_blas = False if has_blas: # test dispatch to np.dot with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always', NonBLASDotWarning) # maltyped data for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]: fast_dot(A.astype(dt1), B.astype(dt2).T) assert_true(type(w.pop(-1)) == NonBLASDotWarning) # malformed data # ndim == 0 E = np.empty(0) fast_dot(E, E) assert_true(type(w.pop(-1)) == NonBLASDotWarning) ## ndim == 1 fast_dot(A, A[0]) assert_true(type(w.pop(-1)) == NonBLASDotWarning) ## ndim > 2 fast_dot(A.T, np.array([A, A])) assert_true(type(w.pop(-1)) == NonBLASDotWarning) ## min(shape) == 1 fast_dot(A, A[0, :][None, :]) assert_true(type(w.pop(-1)) == NonBLASDotWarning) # test for matrix mismatch error msg = ('Invalid array shapes: A.shape[%d] should be the same as ' 'B.shape[0]. Got A.shape=%r B.shape=%r' % (A.ndim - 1, A.shape, A.shape)) assert_raise_message(msg, fast_dot, A, A) # test cov-like use case + dtypes my_assert = assert_array_almost_equal for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) # col < row C = np.dot(A.T, A) C_ = fast_dot(A.T, A) my_assert(C, C_) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) my_assert(C, C_) C = np.dot(A, B.T) C_ = fast_dot(A, B.T) my_assert(C, C_) # test square matrix * rectangular use case A = rng.random_sample([2, 2]) for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) C = np.dot(A, B) C_ = fast_dot(A, B) my_assert(C, C_) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) my_assert(C, C_) if has_blas: for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]: assert_raises(ValueError, fast_dot, x, x.T)
def test_auc_score_non_binary_class(): # Test that roc_auc_score function returns an error when trying # to compute AUC for non-binary class values. rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred) clean_warning_registry() with warnings.catch_warnings(record=True): rng = check_random_state(404) y_pred = rng.rand(10) # y_true contains only one class value y_true = np.zeros(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) y_true = -np.ones(10, dtype="int") assert_raise_message(ValueError, "ROC AUC score is not defined", roc_auc_score, y_true, y_pred) # y_true contains three different class values y_true = rng.randint(0, 3, size=10) assert_raise_message(ValueError, "multiclass format is not supported", roc_auc_score, y_true, y_pred)
def test_bayesian_mixture_precisions_prior_initialisation(): rng = np.random.RandomState(0) n_samples, n_features = 10, 2 X = rng.rand(n_samples, n_features) # Check raise message for a bad value of degrees_of_freedom_prior bad_degrees_of_freedom_prior_ = n_features - 1. bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=bad_degrees_of_freedom_prior_, random_state=rng) assert_raise_message( ValueError, "The parameter 'degrees_of_freedom_prior' should be " "greater than %d, but got %.3f." % (n_features - 1, bad_degrees_of_freedom_prior_), bgmm.fit, X) # Check correct init for a given value of degrees_of_freedom_prior degrees_of_freedom_prior = rng.rand() + n_features - 1. bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=degrees_of_freedom_prior, random_state=rng).fit(X) assert_almost_equal(degrees_of_freedom_prior, bgmm.degrees_of_freedom_prior_) # Check correct init for the default value of degrees_of_freedom_prior degrees_of_freedom_prior_default = n_features bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=degrees_of_freedom_prior_default, random_state=rng).fit(X) assert_almost_equal(degrees_of_freedom_prior_default, bgmm.degrees_of_freedom_prior_) # Check correct init for a given value of covariance_prior covariance_prior = { 'full': np.cov(X.T, bias=1) + 10, 'tied': np.cov(X.T, bias=1) + 5, 'diag': np.diag(np.atleast_2d(np.cov(X.T, bias=1))) + 3, 'spherical': rng.rand() } bgmm = BayesianGaussianMixture(random_state=rng) for cov_type in ['full', 'tied', 'diag', 'spherical']: bgmm.covariance_type = cov_type bgmm.covariance_prior = covariance_prior[cov_type] bgmm.fit(X) assert_almost_equal(covariance_prior[cov_type], bgmm.covariance_prior_) # Check raise message for a bad spherical value of covariance_prior bad_covariance_prior_ = -1. bgmm = BayesianGaussianMixture(covariance_type='spherical', covariance_prior=bad_covariance_prior_, random_state=rng) assert_raise_message( ValueError, "The parameter 'spherical covariance_prior' " "should be greater than 0., but got %.3f." % bad_covariance_prior_, bgmm.fit, X) # Check correct init for the default value of covariance_prior covariance_prior_default = { 'full': np.atleast_2d(np.cov(X.T)), 'tied': np.atleast_2d(np.cov(X.T)), 'diag': np.var(X, axis=0, ddof=1), 'spherical': np.var(X, axis=0, ddof=1).mean() } bgmm = BayesianGaussianMixture(random_state=0) for cov_type in ['full', 'tied', 'diag', 'spherical']: bgmm.covariance_type = cov_type bgmm.fit(X) assert_almost_equal(covariance_prior_default[cov_type], bgmm.covariance_prior_)
def test_gaussian_mixture_attributes(): # test bad parameters rng = np.random.RandomState(0) X = rng.rand(10, 2) n_components_bad = 0 gmm = GaussianMixture(n_components=n_components_bad) assert_raise_message( ValueError, "Invalid value for 'n_components': %d " "Estimation requires at least one component" % n_components_bad, gmm.fit, X) # covariance_type should be in [spherical, diag, tied, full] covariance_type_bad = 'bad_covariance_type' gmm = GaussianMixture(covariance_type=covariance_type_bad) assert_raise_message( ValueError, "Invalid value for 'covariance_type': %s " "'covariance_type' should be in " "['spherical', 'tied', 'diag', 'full']" % covariance_type_bad, gmm.fit, X) tol_bad = -1 gmm = GaussianMixture(tol=tol_bad) assert_raise_message( ValueError, "Invalid value for 'tol': %.5f " "Tolerance used by the EM must be non-negative" % tol_bad, gmm.fit, X) reg_covar_bad = -1 gmm = GaussianMixture(reg_covar=reg_covar_bad) assert_raise_message( ValueError, "Invalid value for 'reg_covar': %.5f " "regularization on covariance must be " "non-negative" % reg_covar_bad, gmm.fit, X) max_iter_bad = 0 gmm = GaussianMixture(max_iter=max_iter_bad) assert_raise_message( ValueError, "Invalid value for 'max_iter': %d " "Estimation requires at least one iteration" % max_iter_bad, gmm.fit, X) n_init_bad = 0 gmm = GaussianMixture(n_init=n_init_bad) assert_raise_message( ValueError, "Invalid value for 'n_init': %d " "Estimation requires at least one run" % n_init_bad, gmm.fit, X) init_params_bad = 'bad_method' gmm = GaussianMixture(init_params=init_params_bad) assert_raise_message( ValueError, "Unimplemented initialization method '%s'" % init_params_bad, gmm.fit, X) # test good parameters n_components, tol, n_init, max_iter, reg_covar = 2, 1e-4, 3, 30, 1e-1 covariance_type, init_params = 'full', 'random' gmm = GaussianMixture(n_components=n_components, tol=tol, n_init=n_init, max_iter=max_iter, reg_covar=reg_covar, covariance_type=covariance_type, init_params=init_params).fit(X) assert_equal(gmm.n_components, n_components) assert_equal(gmm.covariance_type, covariance_type) assert_equal(gmm.tol, tol) assert_equal(gmm.reg_covar, reg_covar) assert_equal(gmm.max_iter, max_iter) assert_equal(gmm.n_init, n_init) assert_equal(gmm.init_params, init_params)
def test_max_iter_error(): km = KMeans(max_iter=-1) assert_raise_message(ValueError, 'Number of iterations should be', km.fit, X)
def test_set_pipeline_step_none(): # Test setting Pipeline steps to None X = np.array([[1]]) y = np.array([1]) mult2 = Mult(mult=2) mult3 = Mult(mult=3) mult5 = Mult(mult=5) def make(): return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)]) pipeline = make() exp = 2 * 3 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline.set_params(m3=None) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert_dict_equal( pipeline.get_params(deep=True), { 'steps': pipeline.steps, 'm2': mult2, 'm3': None, 'last': mult5, 'memory': None, 'm2__mult': 2, 'last__mult': 5, }) pipeline.set_params(m2=None) exp = 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) # for other methods, ensure no AttributeErrors on None: other_methods = [ 'predict_proba', 'predict_log_proba', 'decision_function', 'transform', 'score' ] for method in other_methods: getattr(pipeline, method)(X) pipeline.set_params(m2=mult2) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline = make() pipeline.set_params(last=None) # mult2 and mult3 are active exp = 6 assert_array_equal([[exp]], pipeline.fit(X, y).transform(X)) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert_raise_message(AttributeError, "'NoneType' object has no attribute 'predict'", getattr, pipeline, 'predict') # Check None step at construction time exp = 2 * 5 pipeline = Pipeline([('m2', mult2), ('m3', None), ('last', mult5)]) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]]))
def test_error(): # Test for appropriate exception on errors msg = "Penalty term must be positive" assert_raise_message(ValueError, msg, LogisticRegression(C=-1).fit, X, Y1) assert_raise_message(ValueError, msg, LogisticRegression(C="test").fit, X, Y1) for LR in [LogisticRegression, LogisticRegressionCV]: msg = "Tolerance for stopping criteria must be positive" assert_raise_message(ValueError, msg, LR(tol=-1).fit, X, Y1) assert_raise_message(ValueError, msg, LR(tol="test").fit, X, Y1) msg = "Maximum number of iteration must be positive" assert_raise_message(ValueError, msg, LR(max_iter=-1).fit, X, Y1) assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
def test_parameter_checking(): A = np.ones((2, 2)) name = 'spam' msg = "Invalid solver parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, NMF(solver=name).fit, A) msg = "Invalid init parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, NMF(init=name).fit, A) msg = "Invalid beta_loss parameter: got 'spam' instead of one" assert_raise_message(ValueError, msg, NMF(solver='mu', beta_loss=name).fit, A) msg = "Invalid beta_loss parameter: solver 'cd' does not handle " msg += "beta_loss = 1.0" assert_raise_message(ValueError, msg, NMF(solver='cd', beta_loss=1.0).fit, A) msg = "Negative values in data passed to" assert_raise_message(ValueError, msg, NMF().fit, -A) assert_raise_message(ValueError, msg, nmf._initialize_nmf, -A, 2, 'nndsvd') clf = NMF(2, tol=0.1).fit(A) assert_raise_message(ValueError, msg, clf.transform, -A)
def assert_raises_on_only_one_label(func): """Assert message when there is only one label""" rng = np.random.RandomState(seed=0) assert_raise_message(ValueError, "Number of labels is", func, rng.rand(10, 2), np.zeros(10))
def test_empty_extract(): # Test extract where fit() has not yet been run. msg = ("This OPTICS instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10) assert_raise_message(ValueError, msg, clust.extract_dbscan, 0.01)
def test_check_classification_targets(): # Test that check_classification_target return correct type. #5782 y = np.array([0.0, 1.1, 2.0, 3.0]) msg = type_of_target(y) assert_raise_message(ValueError, msg, check_classification_targets, y)
def test_check_X_y_informative_error(): X = np.ones((2, 2)) y = None assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
def test_non_negative_factorization_checking(): A = np.ones((2, 2)) # Test parameters checking is public function nnmf = non_negative_factorization assert_no_warnings(nnmf, A, A, A, np.int64(1)) msg = ("Number of components must be a positive integer; " "got (n_components=1.5)") assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5) msg = ("Number of components must be a positive integer; " "got (n_components='2')") assert_raise_message(ValueError, msg, nnmf, A, A, A, '2') msg = "Negative values in data passed to NMF (input H)" assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom') msg = "Negative values in data passed to NMF (input W)" assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom') msg = "Array passed to NMF (input H) is full of zeros" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom') msg = "Invalid regularization parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom', True, 'cd', 2., 1e-4, 200, 0., 0., 'spam')
def test_grid_search_param_grid_includes_sequence_of_a_zero_length(): clf = MockClassifier() assert_raise_message( ValueError, "Parameter values for parameter (C) need to be a non-empty sequence.", GridSearchCV, clf, {'C': []})
def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [[]]) # If considered a 1D collection when ensure_2d=False, then the minimum # number of samples will break: msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False) # Invalid edge case when checking the default minimum sample of a scalar msg = "Singleton array array(42) cannot be considered a valid collection." assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False) # But this works if the input data is forced to look like a 2 array with # one sample and one feature: X_checked = assert_warns(DeprecationWarning, check_array, [42], ensure_2d=True) assert_array_equal(np.array([[42]]), X_checked) # Simulate a model that would need at least 2 samples to be well defined X = np.ones((1, 10)) y = np.ones(1) msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2) # The same message is raised if the data has 2 dimensions even if this is # not mandatory assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2, ensure_2d=False) # Simulate a model that would require at least 3 features (e.g. SelectKBest # with k=3) X = np.ones((10, 2)) y = np.ones(2) msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3) # Only the feature check is enabled whenever the number of dimensions is 2 # even if allow_nd is enabled: assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3, allow_nd=True) # Simulate a case where a pipeline stage as trimmed all the features of a # 2D dataset. X = np.empty(0).reshape(10, 0) y = np.ones(10) msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_X_y, X, y) # nd-data is not checked for any minimum number of features by default: X = np.ones((10, 0, 28, 28)) y = np.ones(10) X_checked, y_checked = check_X_y(X, y, allow_nd=True) assert_array_equal(X, X_checked) assert_array_equal(y, y_checked)
def test_predictproba_hardvoting(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='hard') msg = "predict_proba is not available when voting='hard'" assert_raise_message(AttributeError, msg, eclf.predict_proba, X)
def test_check_array(): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert X_array.ndim == 1 # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = ["object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf."] assert message in messages else: assert len(w) == 0 if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if X.format in accept_sparse: # no change if allowed assert X.format == X_checked.format else: # got converted assert X_checked.format == accept_sparse[0] if copy: assert X is not X_checked else: # doesn't copy if it was already good if X.dtype == X_checked.dtype and X.format == X_checked.format: assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, np.ndarray) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert isinstance(result, np.ndarray) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, np.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric")
def check_sample_weight_invariance(name, metric, y1, y2): rng = np.random.RandomState(0) sample_weight = rng.randint(1, 10, size=len(y1)) # check that unit weights gives the same score as no weight unweighted_score = metric(y1, y2, sample_weight=None) assert_almost_equal( unweighted_score, metric(y1, y2, sample_weight=np.ones(shape=len(y1))), err_msg="For %s sample_weight=None is not equivalent to " "sample_weight=ones" % name) # check that the weighted and unweighted scores are unequal weighted_score = metric(y1, y2, sample_weight=sample_weight) assert_not_equal(unweighted_score, weighted_score, msg="Unweighted and weighted scores are unexpectedly " "equal (%f) for %s" % (weighted_score, name)) # check that sample_weight can be a list weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist()) assert_almost_equal( weighted_score, weighted_score_list, err_msg=("Weighted scores for array and list " "sample_weight input are not equal (%f != %f) for %s") % (weighted_score, weighted_score_list, name)) # check that integer weights is the same as repeated samples repeat_weighted_score = metric(np.repeat(y1, sample_weight, axis=0), np.repeat(y2, sample_weight, axis=0), sample_weight=None) assert_almost_equal( weighted_score, repeat_weighted_score, err_msg="Weighting %s is not equal to repeating samples" % name) # check that ignoring a fraction of the samples is equivalent to setting # the corresponding weights to zero sample_weight_subset = sample_weight[1::2] sample_weight_zeroed = np.copy(sample_weight) sample_weight_zeroed[::2] = 0 y1_subset = y1[1::2] y2_subset = y2[1::2] weighted_score_subset = metric(y1_subset, y2_subset, sample_weight=sample_weight_subset) weighted_score_zeroed = metric(y1, y2, sample_weight=sample_weight_zeroed) assert_almost_equal( weighted_score_subset, weighted_score_zeroed, err_msg=("Zeroing weights does not give the same result as " "removing the corresponding samples (%f != %f) for %s" % (weighted_score_zeroed, weighted_score_subset, name))) if not name.startswith('unnormalized'): # check that the score is invariant under scaling of the weights by a # common factor for scaling in [2, 0.3]: assert_almost_equal(weighted_score, metric(y1, y2, sample_weight=sample_weight * scaling), err_msg="%s sample_weight is not invariant " "under scaling" % name) # Check that if number of samples in y_true and sample_weight are not # equal, meaningful error is raised. error_message = ("Found input variables with inconsistent numbers of " "samples: [{}, {}, {}]".format( _num_samples(y1), _num_samples(y2), _num_samples(sample_weight) * 2)) assert_raise_message(ValueError, error_message, metric, y1, y2, sample_weight=np.hstack( [sample_weight, sample_weight]))
def test_grid_search_incorrect_param_grid(): clf = MockClassifier() assert_raise_message( ValueError, "Parameter values for parameter (C) need to be a sequence.", GridSearchCV, clf, {'C': 1})
def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) invalid_type = SVR() msg = ("A sparse matrix was passed, but dense data is required. " "Use X.toarray() to convert to a dense numpy array.") assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=False) with pytest.warns(DeprecationWarning): assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=None) msg = ("Parameter 'accept_sparse' should be a string, " "boolean or list of strings. You provided 'accept_sparse={}'.") assert_raise_message(ValueError, msg.format(invalid_type), check_array, X_csr, accept_sparse=invalid_type) msg = ("When providing 'accept_sparse' as a tuple or list, " "it must contain at least one string value.") assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) assert_raise_message(TypeError, "SVR", check_array, X_csr, accept_sparse=[invalid_type]) # Test deprecation of 'None' assert_warns(DeprecationWarning, check_array, X, accept_sparse=None)
def test_estimator_init(): eclf = VotingClassifier(estimators=[]) msg = ('Invalid `estimators` attribute, `estimators` should be' ' a list of (string, estimator) tuples') assert_raise_message(AttributeError, msg, eclf.fit, X, y) clf = LogisticRegression(random_state=1) eclf = VotingClassifier(estimators=[('lr', clf)], voting='error') msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')') assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2]) msg = ('Number of classifiers and weights must be equal' '; got 2 weights, 1 estimators') assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)], weights=[1, 2]) msg = "Names provided are not unique: ['lr', 'lr']" assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr__', clf)]) msg = "Estimator names must not contain __: got ['lr__']" assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('estimators', clf)]) msg = "Estimator names conflict with constructor arguments: ['estimators']" assert_raise_message(ValueError, msg, eclf.fit, X, y)
def test_regression_thresholded_inf_nan_input(metric): for y_true, y_score in invalids: assert_raise_message(ValueError, "contains NaN, infinity", metric, y_true, y_score)
def test_label_encoder_str_bad_shape(dtype): le = LabelEncoder() le.fit(np.array(["apple", "orange"], dtype=dtype)) msg = "bad input shape" assert_raise_message(ValueError, msg, le.transform, "apple")
def test_fast_mcd_on_invalid_input(): X = np.arange(100) assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', fast_mcd, X)
def test_classification_inf_nan_input(metric): # Classification metrics all raise a mixed input exception for y_true, y_score in invalids: assert_raise_message( ValueError, "Input contains NaN, infinity or a " "value too large", metric, y_true, y_score)
def test_get_values_invalid(): assert_raise_message( TypeError, "Expected h5py.Dataset or tables.CArray. " "Got <class 'str'>", scprep.io.hdf5.get_values, 'invalid')
def test_mcd_class_on_invalid_input(): X = np.arange(100) mcd = MinCovDet() assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', mcd.fit, X)
def test_get_node_invalid(): assert_raise_message( TypeError, "Expected h5py.File, tables.File, h5py.Group or " "tables.Group. Got <class 'str'>", scprep.io.hdf5.get_node, 'invalid', 'node')
def test_estimate_bandwidth_with_sparse_matrix(): # Test estimate_bandwidth with sparse matrix X = sparse.lil_matrix((1000, 1000)) msg = "A sparse matrix was passed, but dense data is required." assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)