-
Notifications
You must be signed in to change notification settings - Fork 1
/
SVM_FaceRecognition.py
185 lines (109 loc) · 5.95 KB
/
SVM_FaceRecognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 15 09:30:53 2017
@author: 100419
"""
from __future__ import print_function
from time import time #步骤计时,,T
import logging #打印一些程序進展狀況
import matplotlib.pyplot as plt #打印預測的人臉
from sklearn.cross_validation import train_test_split #
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC
print(__doc__)
#display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') #程序中進展的信息打印出來
#=======================================================================
# Download the data, if not already on disk and load it as numpy arrays
lfw_people=fetch_lfw_people(min_faces_per_person=70,resize=0.4) #數據下載名人庫
# Introspect the images arrays to fine the shapes(for ploting)
n_samples, h, w = lfw_people.images.shape #返回 number of images
#print(h)
#print(w)
#print(n_samples)
# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X=lfw_people.data #每一行是一個實例,每一列是個特徵向量
#print(X)
n_features = X.shape[1] #每一個向量(圖,人臉)的維度,或number of 特徵值Shape[1]為列數
# the labe to predict is the id of the person
y=lfw_people.target #class label,對應的人臉
#print(y)
target_names=lfw_people.target_names #對應的人名
#print(target_names)
n_classes=target_names.shape[0] #人名List中有多少行,即有多少人(類)要進行區分
print("Total dataset size:")
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
print("n_classes: %d" % n_classes)
#=================================================================================
# Split into a training set and a test set using a stratified k fold
# split into a training and test set
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.25)
#==============================================================================
#降data的維度
#Compute a PCA(eigenfaces) on the face dataset (treated as unlabeled dataset)
#: unuervised feature extrction/dimensionality reduction
n_components=150 #組成元素的數量
print("Extracting the top %d eigenfaces from %d faces" %(n_components, X_train.shape[0]))
t0=time()
#print(t0)
pca=RandomizedPCA(n_components=n_components,whiten=True).fit(X_train) #隨機隆維
print("done in %0.3fs" % (time()-t0))
eigenfaces=pca.components_.reshape((n_components,h,w)) #從人臉提取一些特徵點,叫做eigenface
print("Projecting te input data on the eigenfaces orthonormal basis")
t0=time()
#print(t0)
X_train_pca=pca.transform(X_train) #針對X_train執行降維動作
X_test_pca=pca.transform(X_test) #針對X_test執行降維動作
print("done in %0.3fs" %(time()-t0))
#===========================================================================================
# Train a SCM classification model
print("fitting the classifier to the training set")
t0=time()
param_grid ={'C':[1e3, 5e3, 1e4, 5e4, 1e5], 'gamma':[0.0001,0.0005,0.001,0.005,0.01,0.1],} #嚐試不同參數。共30種組合
# C:float,optional(default=1.0), Penalty parameter C of the error term 對錯誤進行懲罰,權重
# gamma:float, optional(default=0.0) Kernel coefficient for 'rbf", "poly" and "sigmoid"
# If gamma is 0.0 then 1/n_features will be used instead 多少特徵點會被使用將有一個比例
clf=GridSearchCV(SVC(kernel='rbf',class_weight='balanced'),param_grid) #GridSearchCV可試著不同組合
clf=clf.fit(X_train_pca, y_train) #建模,找出使邊界條件最大的超平面。測試集根據超平面來分類
print("done in %0.3fs" % (time()-t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_) #打印出試出來的最好的Estimator的參數是多少
#===========================================================================================
#Quantitative evaluation of the model quality on the test set
print("Predicting people's names on the test set")
t0=time()
y_pred=clf.predict(X_test_pca) #測試集分類預測
print("done in %0.3fs" % (time()-t0))
print(classification_report(y_test,y_pred,target_names=target_names))
print(confusion_matrix(y_test,y_pred,labels=range(n_classes))) #橫軸(行)為class label,縱軸為預測值
#==================================================================================
#Qualitative evaluation of the predictions using matplotlib
def plot_gallery(images,titles,h,w,n_row=3,n_col=4):
#====Helper fuction to plot a allaery of portraits
plt.figure(figsize=(1.8*n_col,2.4*n_row))
plt.subplots_adjust(bottom=0,left=0.1,right=.99,top=.90,hspace=.35)
for i in range(n_row*n_col):
plt.subplot(n_row,n_col, i+1)
plt.imshow(images[i].reshape((h,w)),cmap=plt.cm.gray)
plt.title(titles[i],size=12)
plt.xticks(())
plt.yticks(())
#=============================================================================
# plot the result of the prediction on a partion of the test set
def title(y_pred,y_test,target_names,i):
pred_name=target_names[y_pred[i]].rsplit(' ',1)[-1]
true_name=target_names[y_test[i]].rsplit(' ',1)[-1]
#print('predicted: %s\ntrue: %s' )
return (pred_name,true_name)
prediction_titles=[title(y_pred,y_test,target_names,i) for i in range(y_pred.shape[0])]
plot_gallery(X_test, prediction_titles,h,w)
# plot the allery of the most significative eigenfaces
eigenface_titles=["eigenface %d" % i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces,eigenface_titles,h,w)
plt.show()