def test_pca_check_projection(): """test that the projection of data is correct """ n, p = 100, 3 X = randn(n, p) * .1 X[:10] += np.array([3, 4, 5]) pca = PCA(n_comp=2) pca.fit(X) Xt = 0.1* randn(1, p) + np.array([3, 4, 5]) Yt = pca.transform(Xt) Yt /= np.sqrt((Yt**2).sum()) np.testing.assert_almost_equal(np.abs(Yt[0][0]), 1., 1)
X_train, X_test = X[:split], X[split:] y_train, y_test = y[:split], y[split:] ################################################################################ # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 print "Extracting the top %d eigenfaces" % n_components pca = PCA(n_comp=n_components, whiten=True, do_fast_svd=True).fit(X_train) eigenfaces = pca.components_.T.reshape((n_components, 64, 64)) # project the input data on the eigenfaces orthonormal basis X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) ################################################################################ # Train a SVM classification model print "Fitting the classifier to the training set" param_grid = { 'C': [1, 5, 10, 50, 100], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf'), param_grid, fit_params={'class_weight': 'auto'}) clf = clf.fit(X_train_pca, y_train) print "Best estimator found by grid search:"