def test_lda_orthogonality(): # arrange four classes with their means in a kite-shaped pattern # the longer distance should be transformed to the first component, and # the shorter distance to the second component. means = np.array([[0, 0, -1], [0, 2, 0], [0, -2, 0], [0, 0, 5]]) # We construct perfectly symmetric distributions, so the LDA can estimate # precise means. scatter = np.array([[0.1, 0, 0], [-0.1, 0, 0], [0, 0.1, 0], [0, -0.1, 0], [0, 0, 0.1], [0, 0, -0.1]]) X = (means[:, np.newaxis, :] + scatter[np.newaxis, :, :]).reshape((-1, 3)) y = np.repeat(np.arange(means.shape[0]), scatter.shape[0]) # Fit LDA and transform the means clf = LinearDiscriminantAnalysis(solver="svd").fit(X, y) means_transformed = clf.transform(means) d1 = means_transformed[3] - means_transformed[0] d2 = means_transformed[2] - means_transformed[1] d1 /= np.sqrt(np.sum(d1**2)) d2 /= np.sqrt(np.sum(d2**2)) # the transformed within-class covariance should be the identity matrix assert_almost_equal(np.cov(clf.transform(scatter).T), np.eye(2)) # the means of classes 0 and 3 should lie on the first component assert_almost_equal(np.abs(np.dot(d1[:2], [1, 0])), 1.0) # the means of classes 1 and 2 should lie on the second component assert_almost_equal(np.abs(np.dot(d2[:2], [0, 1])), 1.0)
def test_lda_store_covariance(): # Test for solver 'lsqr' and 'eigen' # 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers for solver in ('lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver).fit(X6, y6) assert hasattr(clf, 'covariance_') # Test the actual attribute: clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(X6, y6) assert hasattr(clf, 'covariance_') assert_array_almost_equal( clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]])) # Test for SVD solver, the default is to not set the covariances_ attribute clf = LinearDiscriminantAnalysis(solver='svd').fit(X6, y6) assert not hasattr(clf, 'covariance_') # Test the actual attribute: clf = LinearDiscriminantAnalysis(solver=solver, store_covariance=True).fit(X6, y6) assert hasattr(clf, 'covariance_') assert_array_almost_equal( clf.covariance_, np.array([[0.422222, 0.088889], [0.088889, 0.533333]]))
def test_lda_dimension_warning(n_classes, n_features): # FIXME: Future warning to be removed in 0.23 rng = check_random_state(0) n_samples = 10 X = rng.randn(n_samples, n_features) # we create n_classes labels by repeating and truncating a # range(n_classes) until n_samples y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples] max_components = min(n_features, n_classes - 1) for n_components in [max_components - 1, None, max_components]: # if n_components <= min(n_classes - 1, n_features), no warning lda = LinearDiscriminantAnalysis(n_components=n_components) assert_no_warnings(lda.fit, X, y) for n_components in [ max_components + 1, max(n_features, n_classes - 1) + 1 ]: # if n_components > min(n_classes - 1, n_features), raise warning # We test one unit higher than max_components, and then something # larger than both n_features and n_classes - 1 to ensure the test # works for any value of n_component lda = LinearDiscriminantAnalysis(n_components=n_components) msg = ("n_components cannot be larger than min(n_features, " "n_classes - 1). Using min(n_features, " "n_classes - 1) = min(%d, %d - 1) = %d components." % (n_features, n_classes, max_components)) assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y) future_msg = ("In version 0.23, setting n_components > min(" "n_features, n_classes - 1) will raise a " "ValueError. You should set n_components to None" " (default), or a value smaller or equal to " "min(n_features, n_classes - 1).") assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
def test_raises_value_error_on_same_number_of_classes_and_samples(solver): """ Tests that if the number of samples equals the number of classes, a ValueError is raised. """ X = np.array([[0.5, 0.6], [0.6, 0.5]]) y = np.array(["a", "b"]) clf = LinearDiscriminantAnalysis(solver=solver) with pytest.raises(ValueError, match="The number of samples must be more"): clf.fit(X, y)
def test_lda_transform(): # Test LDA transform. clf = LinearDiscriminantAnalysis(solver="svd", n_components=1) X_transformed = clf.fit(X, y).transform(X) assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1) X_transformed = clf.fit(X, y).transform(X) assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1) clf.fit(X, y) msg = "transform not implemented for 'lsqr'" assert_raise_message(NotImplementedError, msg, clf.transform, X)
def test_set_random_state(): lda = LinearDiscriminantAnalysis() tree = DecisionTreeClassifier() # Linear Discriminant Analysis doesn't have random state: smoke test set_random_state(lda, 3) set_random_state(tree, 3) assert tree.random_state == 3
def test_lda_scaling(): # Test if classification works correctly with differently scaled features. n = 100 rng = np.random.RandomState(1234) # use uniform distribution of features to make sure there is absolutely no # overlap between classes. x1 = rng.uniform(-1, 1, (n, 3)) + [-10, 0, 0] x2 = rng.uniform(-1, 1, (n, 3)) + [10, 0, 0] x = np.vstack((x1, x2)) * [1, 100, 10000] y = [-1] * n + [1] * n for solver in ('svd', 'lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver) # should be able to separate the data perfectly assert clf.fit(x, y).score(x, y) == 1.0, ('using covariance: %s' % solver)
def test_lda_numeric_consistency_float32_float64(): for (solver, shrinkage) in solver_shrinkage: clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf_32.fit(X.astype(np.float32), y.astype(np.float32)) clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf_64.fit(X.astype(np.float64), y.astype(np.float64)) # Check value consistency between types rtol = 1e-6 assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)
def test_set_random_states(): # Linear Discriminant Analysis doesn't have random state: smoke test _set_random_states(LinearDiscriminantAnalysis(), random_state=17) clf1 = Perceptron(random_state=None) assert clf1.random_state is None # check random_state is None still sets _set_random_states(clf1, None) assert isinstance(clf1.random_state, int) # check random_state fixes results in consistent initialisation _set_random_states(clf1, 3) assert isinstance(clf1.random_state, int) clf2 = Perceptron(random_state=None) _set_random_states(clf2, 3) assert clf1.random_state == clf2.random_state # nested random_state def make_steps(): return [('sel', SelectFromModel(Perceptron(random_state=None))), ('clf', Perceptron(random_state=None))] est1 = Pipeline(make_steps()) _set_random_states(est1, 3) assert isinstance(est1.steps[0][1].estimator.random_state, int) assert isinstance(est1.steps[1][1].random_state, int) assert (est1.get_params()['sel__estimator__random_state'] != est1.get_params()['clf__random_state']) # ensure multiple random_state parameters are invariant to get_params() # iteration order class AlphaParamPipeline(Pipeline): def get_params(self, *args, **kwargs): params = Pipeline.get_params(self, *args, **kwargs).items() return OrderedDict(sorted(params)) class RevParamPipeline(Pipeline): def get_params(self, *args, **kwargs): params = Pipeline.get_params(self, *args, **kwargs).items() return OrderedDict(sorted(params, reverse=True)) for cls in [AlphaParamPipeline, RevParamPipeline]: est2 = cls(make_steps()) _set_random_states(est2, 3) assert (est1.get_params()['sel__estimator__random_state'] == est2.get_params()['sel__estimator__random_state']) assert (est1.get_params()['clf__random_state'] == est2.get_params() ['clf__random_state'])
def test_lda_priors(): # Test priors (negative priors) priors = np.array([0.5, -0.5]) clf = LinearDiscriminantAnalysis(priors=priors) msg = "priors must be non-negative" assert_raise_message(ValueError, msg, clf.fit, X, y) # Test that priors passed as a list are correctly handled (run to see if # failure) clf = LinearDiscriminantAnalysis(priors=[0.5, 0.5]) clf.fit(X, y) # Test that priors always sum to 1 priors = np.array([0.5, 0.6]) prior_norm = np.array([0.45, 0.55]) clf = LinearDiscriminantAnalysis(priors=priors) assert_warns(UserWarning, clf.fit, X, y) assert_array_almost_equal(clf.priors_, prior_norm, 2)
def test_lda_explained_variance_ratio(): # Test if the sum of the normalized eigen vectors values equals 1, # Also tests whether the explained_variance_ratio_ formed by the # eigen solver is the same as the explained_variance_ratio_ formed # by the svd solver state = np.random.RandomState(0) X = state.normal(loc=0, scale=100, size=(40, 20)) y = state.randint(0, 3, size=(40, )) clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_eigen.fit(X, y) assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3) assert clf_lda_eigen.explained_variance_ratio_.shape == (2, ), ( "Unexpected length for explained_variance_ratio_") clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_svd.fit(X, y) assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3) assert clf_lda_svd.explained_variance_ratio_.shape == (2, ), ( "Unexpected length for explained_variance_ratio_") assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_)
X, y = make_blobs(n_samples=n_samples, n_features=1, centers=[[-2], [2]]) # add non-discriminative features if n_features > 1: X = np.hstack([X, np.random.randn(n_samples, n_features - 1)]) return X, y acc_clf1, acc_clf2 = [], [] n_features_range = range(1, n_features_max + 1, step) for n_features in n_features_range: score_clf1, score_clf2 = 0, 0 for _ in range(n_averages): X, y = generate_data(n_train, n_features) clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y) clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y) X, y = generate_data(n_test, n_features) score_clf1 += clf1.score(X, y) score_clf2 += clf2.score(X, y) acc_clf1.append(score_clf1 / n_averages) acc_clf2.append(score_clf2 / n_averages) features_samples_ratio = np.array(n_features_range) / n_train plt.plot(features_samples_ratio, acc_clf1, linewidth=2,
import matplotlib.pyplot as plt from mrex import datasets from mrex.decomposition import PCA from mrex.discriminant_analysis import LinearDiscriminantAnalysis iris = datasets.load_iris() X = iris.data y = iris.target target_names = iris.target_names pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LinearDiscriminantAnalysis(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_)) plt.figure() colors = ['navy', 'turquoise', 'darkorange'] lw = 2 for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8,
def test_lda_predict_proba(solver, n_classes): def generate_dataset(n_samples, centers, covariances, random_state=None): """Generate a multivariate normal data given some centers and covariances""" rng = check_random_state(random_state) X = np.vstack([ rng.multivariate_normal(mean, cov, size=n_samples // len(centers)) for mean, cov in zip(centers, covariances) ]) y = np.hstack([[clazz] * (n_samples // len(centers)) for clazz in range(len(centers))]) return X, y blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes] blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers)) X, y = generate_dataset(n_samples=90000, centers=blob_centers, covariances=blob_stds, random_state=42) lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True, shrinkage=None).fit(X, y) # check that the empirical means and covariances are close enough to the # one used to generate the data assert_allclose(lda.means_, blob_centers, atol=1e-1) assert_allclose(lda.covariance_, blob_stds[0], atol=1) # implement the method to compute the probability given in The Elements # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression # or LDA?") precision = linalg.inv(blob_stds[0]) alpha_k = [] alpha_k_0 = [] for clazz in range(len(blob_centers) - 1): alpha_k.append( np.dot(precision, (blob_centers[clazz] - blob_centers[-1])[:, np.newaxis])) alpha_k_0.append( np.dot( -0.5 * (blob_centers[clazz] + blob_centers[-1])[np.newaxis, :], alpha_k[-1])) sample = np.array([[-22, 22]]) def discriminant_func(sample, coef, intercept, clazz): return np.exp(intercept[clazz] + np.dot(sample, coef[clazz])) prob = np.array([ float( discriminant_func(sample, alpha_k, alpha_k_0, clazz) / (1 + sum([ discriminant_func(sample, alpha_k, alpha_k_0, clazz) for clazz in range(n_classes - 1) ]))) for clazz in range(n_classes - 1) ]) prob_ref = 1 - np.sum(prob) # check the consistency of the computed probability # all probabilities should sum to one prob_ref_2 = float(1 / (1 + sum([ discriminant_func(sample, alpha_k, alpha_k_0, clazz) for clazz in range(n_classes - 1) ]))) assert prob_ref == pytest.approx(prob_ref_2) # check that the probability of LDA are close to the theoretical # probabilties assert_allclose(lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2)
# Split into train/test X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.5, stratify=y, random_state=random_state) dim = len(X[0]) n_classes = len(np.unique(y)) # Reduce dimension to 2 with PCA pca = make_pipeline(StandardScaler(), PCA(n_components=2, random_state=random_state)) # Reduce dimension to 2 with LinearDiscriminantAnalysis lda = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis(n_components=2)) # Reduce dimension to 2 with NeighborhoodComponentAnalysis nca = make_pipeline(StandardScaler(), NeighborhoodComponentsAnalysis(n_components=2, random_state=random_state)) # Use a nearest neighbor classifier to evaluate the methods knn = KNeighborsClassifier(n_neighbors=n_neighbors) # Make a list of the methods to be compared dim_reduction_methods = [('PCA', pca), ('LDA', lda), ('NCA', nca)] # plt.figure() for i, (name, model) in enumerate(dim_reduction_methods): plt.figure()
def test_lda_predict(): # Test LDA classification. # This checks that LDA implements fit and predict and returns correct # values for simple toy data. for test_case in solver_shrinkage: solver, shrinkage = test_case clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y, 'solver %s' % solver) # Assert that it works with 1D data y_pred1 = clf.fit(X1, y).predict(X1) assert_array_equal(y_pred1, y, 'solver %s' % solver) # Test probability estimates y_proba_pred1 = clf.predict_proba(X1) assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, 'solver %s' % solver) y_log_proba_pred1 = clf.predict_log_proba(X1) assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8, 'solver %s' % solver) # Primarily test for commit 2f34950 -- "reuse" of priors y_pred3 = clf.fit(X, y3).predict(X) # LDA shouldn't be able to separate those assert np.any(y_pred3 != y3), 'solver %s' % solver # Test invalid shrinkages clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=-0.2231) assert_raises(ValueError, clf.fit, X, y) clf = LinearDiscriminantAnalysis(solver="eigen", shrinkage="dummy") assert_raises(ValueError, clf.fit, X, y) clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto") assert_raises(NotImplementedError, clf.fit, X, y) # Test unknown solver clf = LinearDiscriminantAnalysis(solver="dummy") assert_raises(ValueError, clf.fit, X, y)
def test_lda_coefs(): # Test if the coefficients of the solvers are approximately the same. n_features = 2 n_classes = 2 n_samples = 1000 X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11) clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr") clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_svd.fit(X, y) clf_lda_lsqr.fit(X, y) clf_lda_eigen.fit(X, y) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1) assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)
def test_lda_dtype_match(data_type, expected_type): for (solver, shrinkage) in solver_shrinkage: clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) clf.fit(X.astype(data_type), y.astype(data_type)) assert clf.coef_.dtype == expected_type
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red') plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue') def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariance_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariance_[1], 'blue') plt.figure(figsize=(10, 8), facecolor='white') plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis', y=0.98, fontsize=15) for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) y_pred = lda.fit(X, y).predict(X) splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) plot_lda_cov(lda, splot) plt.axis('tight') # Quadratic Discriminant Analysis qda = QuadraticDiscriminantAnalysis(store_covariance=True) y_pred = qda.fit(X, y).predict(X) splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) plot_qda_cov(qda, splot) plt.axis('tight') plt.tight_layout() plt.subplots_adjust(top=0.92) plt.show()