def test_lda_numeric_consistency_float32_float64(): for (solver, shrinkage) in solver_shrinkage: clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf_32.fit(X.astype(np.float32), y.astype(np.float32)) clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf_64.fit(X.astype(np.float64), y.astype(np.float64)) # Check value consistency between types rtol = 1e-6 assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)
def test_raises_value_error_on_same_number_of_classes_and_samples(solver): """ Tests that if the number of samples equals the number of classes, a ValueError is raised. """ X = np.array([[0.5, 0.6], [0.6, 0.5]]) y = np.array(["a", "b"]) clf = LinearDiscriminantAnalysis(solver=solver) with pytest.raises(ValueError, match="The number of samples must be more"): clf.fit(X, y)
def test_lda_transform(): # Test LDA transform. clf = LinearDiscriminantAnalysis(solver="svd", n_components=1) X_transformed = clf.fit(X, y).transform(X) assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1) X_transformed = clf.fit(X, y).transform(X) assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1) clf.fit(X, y) msg = "transform not implemented for 'lsqr'" assert_raise_message(NotImplementedError, msg, clf.transform, X)
def test_lda_priors(): # Test priors (negative priors) priors = np.array([0.5, -0.5]) clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" assert_raise_message(ValueError, msg, clf.fit, X, y) # Test that priors passed as a list are correctly handled (run to see if # failure) clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5]) clf.fit(X, y) # Test that priors always sum to 1 priors = np.array([0.5, 0.6]) prior_norm = np.array([0.45, 0.55]) clf = LinearDiscriminantAnalysis(priors=priors) assert_warns(UserWarning, clf.fit, X, y) assert_array_almost_equal(clf.priors_, prior_norm, 2)
def test_lda_predict(): # Test LDA classification. # This checks that LDA implements fit and predict and returns correct # values for simple toy data. for test_case in solver_shrinkage: solver, shrinkage = test_case clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y, 'solver %s' % solver) # Assert that it works with 1D data y_pred1 = clf.fit(X1, y).predict(X1) assert_array_equal(y_pred1, y, 'solver %s' % solver) # Test probability estimates y_proba_pred1 = clf.predict_proba(X1) assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, 'solver %s' % solver) y_log_proba_pred1 = clf.predict_log_proba(X1) assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8, 'solver %s' % solver) # Primarily test for commit 2f34950 -- "reuse" of priors y_pred3 = clf.fit(X, y3).predict(X) # LDA shouldn't be able to separate those assert np.any(y_pred3 != y3), 'solver %s' % solver # Test invalid shrinkages clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=-0.2231) assert_raises(ValueError, clf.fit, X, y) clf = LinearDiscriminantAnalysis(solver="eigen", shrinkage="dummy") assert_raises(ValueError, clf.fit, X, y) clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto") assert_raises(NotImplementedError, clf.fit, X, y) # Test unknown solver clf = LinearDiscriminantAnalysis(solver="dummy") assert_raises(ValueError, clf.fit, X, y)
def test_lda_explained_variance_ratio(): # Test if the sum of the normalized eigen vectors values equals 1, # Also tests whether the explained_variance_ratio_ formed by the # eigen solver is the same as the explained_variance_ratio_ formed # by the svd solver state = np.random.RandomState(0) X = state.normal(loc=0, scale=100, size=(40, 20)) y = state.randint(0, 3, size=(40, )) clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_eigen.fit(X, y) assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3) assert clf_lda_eigen.explained_variance_ratio_.shape == (2, ), ( "Unexpected length for explained_variance_ratio_") clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_svd.fit(X, y) assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3) assert clf_lda_svd.explained_variance_ratio_.shape == (2, ), ( "Unexpected length for explained_variance_ratio_") assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_)
def test_lda_scaling(): # Test if classification works correctly with differently scaled features. n = 100 rng = np.random.RandomState(1234) # use uniform distribution of features to make sure there is absolutely no # overlap between classes. x1 = rng.uniform(-1, 1, (n, 3)) + [-10, 0, 0] x2 = rng.uniform(-1, 1, (n, 3)) + [10, 0, 0] x = np.vstack((x1, x2)) * [1, 100, 10000] y = [-1] * n + [1] * n for solver in ('svd', 'lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver) # should be able to separate the data perfectly assert clf.fit(x, y).score(x, y) == 1.0, ('using covariance: %s' % solver)
def test_lda_coefs(): # Test if the coefficients of the solvers are approximately the same. n_features = 2 n_classes = 2 n_samples = 1000 X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11) clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr") clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_svd.fit(X, y) clf_lda_lsqr.fit(X, y) clf_lda_eigen.fit(X, y) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1) assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)
from mrex import datasets from mrex.decomposition import PCA from mrex.discriminant_analysis import LinearDiscriminantAnalysis iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LinearDiscriminantAnalysis(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_)) plt.figure() colors = ['navy', 'turquoise', 'darkorange'] lw = 2 for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw,
def test_lda_dtype_match(data_type, expected_type): for (solver, shrinkage) in solver_shrinkage: clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf.fit(X.astype(data_type), y.astype(data_type)) assert clf.coef_.dtype == expected_type
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariance_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariance_[1], 'blue') plt.figure(figsize=(10, 8), facecolor='white') plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis', y=0.98, fontsize=15) for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) y_pred = lda.fit(X, y).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) plt.axis('tight') # Quadratic Discriminant Analysis qda = QuadraticDiscriminantAnalysis(store_covariance=True) y_pred = qda.fit(X, y).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') plt.tight_layout() plt.subplots_adjust(top=0.92) plt.show()