from scikits.learn.cross_val import StratifiedKFold from scikits.learn.datasets import fetch_lfw_people from scikits.learn.grid_search import GridSearchCV from scikits.learn.decomposition import RandomizedPCA from scikits.learn.svm import SVC # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # reshape the data using the traditional (n_samples, n_features) shape faces = lfw_people.data n_samples, h, w = faces.shape X = faces.reshape((n_samples, h * w)) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names # split into a training and testing set train, test = iter(StratifiedKFold(y, k=4)).next() X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) eigenfaces = pca.components_.reshape((n_components, h, w)) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test)
from scikits.learn.cross_val import StratifiedKFold from scikits.learn.datasets import fetch_lfw_people from scikits.learn.grid_search import GridSearchCV from scikits.learn.metrics import classification_report from scikits.learn.metrics import confusion_matrix from scikits.learn.pca import RandomizedPCA from scikits.learn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ################################################################################ # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) # reshape the data using the traditional (n_samples, n_features) shape faces = lfw_people.data n_samples, h, w = faces.shape X = faces.reshape((n_samples, h * w)) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print "Total dataset size:" print "n_samples: %d" % n_samples
""" Stripped-down version of the face recognition example by Olivier Grisel http://scikit-learn.org/dev/auto_examples/applications/face_recognition.html ## original shape of images: 50, 37 """ import numpy as np from scikits.learn import cross_val, datasets, decomposition, svm # .. # .. load data .. lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, resize=0.4) faces = np.reshape(lfw_people.data, (lfw_people.target.shape[0], -1)) train, test = iter(cross_val.StratifiedKFold(lfw_people.target, k=4)).next() X_train, X_test = faces[train], faces[test] y_train, y_test = lfw_people.target[train], lfw_people.target[test] # .. # .. dimension reduction .. pca = decomposition.RandomizedPCA(n_components=150, whiten=True) pca.fit(X_train) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # .. # .. classification .. clf = svm.SVC(C=5., gamma=0.001) clf.fit(X_train_pca, y_train)