-
Notifications
You must be signed in to change notification settings - Fork 1
/
facerecognition.py
128 lines (86 loc) · 3.57 KB
/
facerecognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import pylab as pl
import numpy as np
from sklearn.datasets import fetch_lfw_people
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
X = lfw_people.data
y = lfw_people.target
print(y)
names = lfw_people.target_names
n_samples, n_features = X.shape # X is a matrix, so the x y are rows columns.
_, h, w = lfw_people.images.shape # Seems to be 3D so height and width
n_classes = len(names)
print("n_samples: {}".format(n_samples))
print("n_features: {}".format(n_features))
print("n_classes: {}".format(n_classes))
def plot_gallery(images, titles, h, w, n_row=3, n_col=6):
"""Helper fn to plt a gallery of portraits"""
pl.figure(figsize=(1.7 * n_col, 2.3 * n_row))
pl.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
for i in range(n_row * n_col):
pl.subplot(n_row, n_col, i + 1)
pl.imshow(images[i].reshape((h, w)), cmap=pl.cm.gray)
pl.title(titles[i],size=12)
pl.xticks(())
pl.yticks(())
plot_gallery(X, names[y], h, w)
pl.figure(figsize=(14,3))
y_unique = np.unique(y)
counts = [(y==i).sum() for i in y_unique]
"""
for each value in y_unique as i:
if y==i, then 1, otherwise 0.
sum all values of 1 and zero produced.
result is array of counts of each unique value.
Note: The sum function sums along the rows
"""
pl.xticks(y_unique, names[y_unique])
locs, labels = pl.xticks()
pl.setp(labels,rotation=45,size=20)
_ = pl.bar(y_unique,counts)
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
"""
Unsupervised Feature Extraction with Randomized PCA (Principal Component Analysis)
Linear Dimensionality Reduction using approximated Singular Value Decomposition
of the data and keeping only the most significant singular vectors to project the
data to a lower dimensional space.
"""
from sklearn.decomposition import RandomizedPCA
n_components = 150
print("Extracting the top {} eigenfaces from {} faces".format(n_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=n_components, whiten=True)
pca.fit(X_train)
eigenfaces = pca.components_.reshape((n_components, h, w))
eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces, eigenface_titles, h, w)
# Projecting the input data on the eigenfaces orthonormal basis
X_train_pca = pca.transform(X_train)
from sklearn.svm import SVC
svm = SVC(kernel='rbf', class_weight='auto')
svm
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.cross_validation import cross_val_score
cv = StratifiedShuffleSplit(y_train, test_size=0.20, n_iter=3)
svm_cv_scores = cross_val_score(svm, X_train_pca, y_train, scoring='f1', n_jobs=2)
svm_cv_scores
svm_cv_scores.mean(), svm_cv_scores.std()
from sklearn.grid_search import GridSearchCV
param_grid = {
'C': [1e3, 5e3, 1e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}
clf = GridSearchCV(svm, param_grid, scoring='f1', cv=cv, n_jobs=2)
clf = clf.fit(X_train_pca, y_train)
print("Best estimator found by randomized hyper parameter search:")
print(clf.best_params_)
print("Best parameters validation score: {:.3f}".format(clf.best_score_))
X_test_pca = pca.transforom(X_test)
y_pred = clf.predict(X_test_pca)
def title(y_pred, y_test, target_names, i):
pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
return 'predicted: %s\ntrue: %s' % (pred_name, true_name)
prediction_titles = [title(y_pred, y_test, names, i)
for i in range(y_pred.shape[0])]
plot_gallery(X_test, prediction_titles, h, w)
pl.show()