def test_fastica_convergence_fail(): # Test the FastICA algorithm on very simple data # (see test_non_square_fastica). # Ensure a ConvergenceWarning raised if the tolerance is sufficiently low. rng = np.random.RandomState(0) n_samples = 1000 # Generate two sources: t = np.linspace(0, 100, n_samples) s1 = np.sin(t) s2 = np.ceil(np.sin(np.pi * t)) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s # Mixing matrix mixing = rng.randn(6, 2) m = np.dot(mixing, s) # Do fastICA with tolerance 0. to ensure failing convergence ica = FastICA(algorithm="parallel", n_components=2, random_state=rng, max_iter=2, tol=0.) assert_warns(ConvergenceWarning, ica.fit, m.T)
def test_ovr_always_present(): # Test that ovr works with classes that are always present or absent. # Note: tests is the case where _ConstantPredictor is utilised X = np.ones((10, 2)) X[:5, :] = 0 # Build an indicator matrix where two features are always on. # As list of lists, it would be: [[int(i >= 5), 2, 3] for i in range(10)] y = np.zeros((10, 3)) y[5:, 0] = 1 y[:, 1] = 1 y[:, 2] = 1 ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict(X) assert_array_equal(np.array(y_pred), np.array(y)) y_pred = ovr.decision_function(X) assert np.unique(y_pred[:, -2:]) == 1 y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.ones(X.shape[0])) # y has a constantly absent label y = np.zeros((10, 2)) y[5:, 0] = 1 # variable label ovr = OneVsRestClassifier(LogisticRegression()) assert_warns(UserWarning, ovr.fit, X, y) y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.zeros(X.shape[0]))
def test_fastica_nowhiten(): m = [[0, 1], [1, 0]] # test for issue #697 ica = FastICA(n_components=1, whiten=False, random_state=0) assert_warns(UserWarning, ica.fit, m) assert hasattr(ica, 'mixing_')
def test_identical_regressors(): newX = X.copy() newX[:, 1] = newX[:, 0] gamma = np.zeros(n_features) gamma[0] = gamma[1] = 1. newy = np.dot(newX, gamma) assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
def check_oob_score(name, X, y, n_estimators=20): # Check that oob prediction is a good estimation of the generalization # error. # Proper behavior est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=n_estimators, bootstrap=True) n_samples = X.shape[0] est.fit(X[:n_samples // 2, :], y[:n_samples // 2]) test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:]) if name in FOREST_CLASSIFIERS: assert abs(test_score - est.oob_score_) < 0.1 else: assert test_score > est.oob_score_ assert est.oob_score_ > .8 # Check warning if not enough estimators with np.errstate(divide="ignore", invalid="ignore"): est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=1, bootstrap=True) assert_warns(UserWarning, est.fit, X, y)
def test_enet_coordinate_descent(klass, n_classes, kwargs): """Test that a warning is issued if model does not converge""" clf = klass(max_iter=2, **kwargs) n_samples = 5 n_features = 2 X = np.ones((n_samples, n_features)) * 1e50 y = np.ones((n_samples, n_classes)) if klass == Lasso: y = y.ravel() assert_warns(ConvergenceWarning, clf.fit, X, y)
def test_ridge_regression_convergence_fail(): rng = np.random.RandomState(0) y = rng.randn(5) X = rng.randn(5, 10) assert_warns(ConvergenceWarning, ridge_regression, X, y, alpha=1.0, solver="sparse_cg", tol=0., max_iter=None, verbose=1)
def test_lda_priors(): # Test priors (negative priors) priors = np.array([0.5, -0.5]) clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" assert_raise_message(ValueError, msg, clf.fit, X, y) # Test that priors passed as a list are correctly handled (run to see if # failure) clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5]) clf.fit(X, y) # Test that priors always sum to 1 priors = np.array([0.5, 0.6]) prior_norm = np.array([0.45, 0.55]) clf = LinearDiscriminantAnalysis(priors=priors) assert_warns(UserWarning, clf.fit, X, y) assert_array_almost_equal(clf.priors_, prior_norm, 2)
def test_covariance(): # Tests Covariance module on a simple dataset. # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) emp_cov = empirical_covariance(X) assert_array_almost_equal(emp_cov, cov.covariance_, 4) assert_almost_equal(cov.error_norm(emp_cov), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0) assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0) assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0) with pytest.raises(NotImplementedError): cov.error_norm(emp_cov, norm='foo') # Mahalanobis distances computation test mahal_dist = cov.mahalanobis(X) assert np.amin(mahal_dist) > 0 # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) cov = EmpiricalCovariance() cov.fit(X_1d) assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4) assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0) assert_almost_equal( cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0) # test with one sample # Create X with 1 sample and 5 features X_1sample = np.arange(5).reshape(1, 5) cov = EmpiricalCovariance() assert_warns(UserWarning, cov.fit, X_1sample) assert_array_almost_equal(cov.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test integer type X_integer = np.asarray([[0, 1], [1, 0]]) result = np.asarray([[0.25, -0.25], [-0.25, 0.25]]) assert_array_almost_equal(empirical_covariance(X_integer), result) # test centered case cov = EmpiricalCovariance(assume_centered=True) cov.fit(X) assert_array_equal(cov.location_, np.zeros(X.shape[1]))
def test_unreachable_accuracy(): assert_array_almost_equal(orthogonal_mp(X, y, tol=0), orthogonal_mp(X, y, n_nonzero_coefs=n_features)) assert_array_almost_equal( assert_warns(RuntimeWarning, orthogonal_mp, X, y, tol=0, precompute=True), orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features))
def check_warm_start_equal_n_estimators(name): # Test if warm start with equal n_estimators does nothing and returns the # same forest and raises a warning. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf.fit(X, y) clf_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf_2.fit(X, y) # Now clf_2 equals clf. clf_2.set_params(random_state=2) assert_warns(UserWarning, clf_2.fit, X, y) # If we had fit the trees again we would have got a different forest as we # changed the random state. assert_array_equal(clf.apply(X), clf_2.apply(X))
def check_class_weight_errors(name): # Test if class_weight raises errors and warnings when expected. ForestClassifier = FOREST_CLASSIFIERS[name] _y = np.vstack((y, np.array(y) * 2)).T # Invalid preset string clf = ForestClassifier(class_weight='the larch', random_state=0) assert_raises(ValueError, clf.fit, X, y) assert_raises(ValueError, clf.fit, X, _y) # Warning warm_start with preset clf = ForestClassifier(class_weight='balanced', warm_start=True, random_state=0) assert_warns(UserWarning, clf.fit, X, y) assert_warns(UserWarning, clf.fit, X, _y) # Not a list or preset for multi-output clf = ForestClassifier(class_weight=1, random_state=0) assert_raises(ValueError, clf.fit, X, _y) # Incorrect length list for multi-output clf = ForestClassifier(class_weight=[{-1: 0.5, 1: 1.}], random_state=0) assert_raises(ValueError, clf.fit, X, _y)