def plot_nmf_faces(): """利用越来越多的非负分量对三张人脸图像进行重建""" # 下面这张图计算时间比较长,需要耐心等等 people = load_people() image_shape = people.images[0].shape X_train, X_test, y_train, y_test = load_train_test_faces() mglearn.plots.plot_nmf_faces(X_train, X_test, image_shape=image_shape) plt.suptitle("图3-14 利用越来越多的非负分量对三张人脸图像进行重建")
def knn_classify_pca_faces(): people = load_people() X_train, X_test, y_train, y_test = load_train_test_faces() # 计算每个目标出现的次数 counts = np.bincount(people.target) # 将次数与目标名称一起打印出来 print('{0:25} {1:5}'.format("姓名", "照片数目")) for i, (count, name) in enumerate(zip(counts, people.target_names)): print("{0:25} {1:3}".format(name, count), end='\t\t') if (i + 1) % 3 == 0: print() print() # 2) 使用KNN训练和测试被PCA白化的数据 # X_train_pca.shape: (1341, 100) # Test set score of 1-nn: 0.35 # pca.components_.shape: (100, 5655) from sklearn.decomposition import PCA pca = PCA(n_components=75, whiten=True, random_state=seed) pca.fit(X_train) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1) knn.fit(X_train_pca, y_train) print('=' * 20) print("-- 使用KNN训练和测试经过PCA白化的数据 --") print('PCA主成分的形状: {}'.format(pca.components_.shape)) print('经过PCA白化的数据的形状: {}'.format(X_train_pca.shape)) print('PCA白化的数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test_pca, y_test))) image_shape = people.images[0].shape fig, axes = plt.subplots(3, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()}) for i, (component, ax) in enumerate(zip(pca.components_, axes.ravel())): ax.imshow(component.reshape(image_shape), cmap='viridis') ax.set_title('{}.component'.format((i + 1))) pass plt.suptitle("图3-9:人脸数据集的前15个主成分的成分向量") # 图3-10:人脸照片=Σ_(i=0)^(n) x_i * components_i # 每张照片就是主成分的加权求和 mglearn.plots.plot_pca_faces(X_train, X_test, image_shape) plt.suptitle("图3-11 利用越来越多的主成分对三张人脸图像进行重建") # plt.figure() # plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], y_train) plt.figure() mglearn.discrete_scatter(X_train_pca[:, 0], X_train_pca[:, 1], y_train) plt.xlabel('第一个主成分') plt.ylabel('第二个主成分') plt.suptitle("两个主成分的散点图\n观察数据聚类的效果(数据不可分)")
def knn_classify_nmf_faces(): people = load_people() image_shape = people.images[0].shape X_train, X_test, y_train, y_test = load_train_test_faces() from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1) knn.fit(X_train, y_train) print('=' * 20) print("-- 使用KNN训练和测试原始数据 --") print('原始数据的形状: {}'.format(X_train.shape)) print('原始数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test, y_test))) # 分量太少,学习的精确度较差,但是增加分量不一定能够提高模型的精度,但是会增加计算时间 from sklearn.decomposition import NMF nmf = NMF(n_components=100, max_iter=200, random_state=seed) nmf.fit(X_train) X_train_nmf = nmf.transform(X_train) X_test_nmf = nmf.transform(X_test) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1) knn.fit(X_train_nmf, y_train) print('=' * 20) print("-- 使用KNN训练和测试经过NMF处理的数据 --") print('NMF成分的形状: {}'.format(nmf.components_.shape)) print('经过NMF的数据的形状: {}'.format(X_train_nmf.shape)) print('NMF的数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test_nmf, y_test))) fig, axes = plt.subplots(3, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()}) plt.suptitle("图3-15 使用15个分量的NMF在人脸数据集上找到的15个分量") for i, (component, ax) in enumerate(zip(nmf.components_, axes.ravel())): ax.imshow(component.reshape(image_shape), cmap='viridis') ax.set_title('{}.component'.format((i + 1))) pass components = 3 # 不同分量的图片有一定的共性 indexes = np.argsort(X_train_nmf[:, components])[::-1] fig, axes = plt.subplots(2, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()}) plt.suptitle("图3-16 第3个分量的系数较大的人脸") for i, (index, ax) in enumerate(zip(indexes, axes.ravel())): ax.imshow(X_train[index].reshape(image_shape)) pass components = 7 # 不同分量的图片有一定的共性 indexes = np.argsort(X_train_nmf[:, components])[::-1] fig, axes = plt.subplots(2, 5, figsize=(20, 10), subplot_kw={'xticks': (), 'yticks': ()}) plt.suptitle("图3-16 第7个分量的系数较大的人脸") for i, (index, ax) in enumerate(zip(indexes, axes.ravel())): ax.imshow(X_train[index].reshape(image_shape)) pass
def knn_classify_original_faces(): X_train, X_test, y_train, y_test= load_train_test_faces() # 1) 使用KNN训练和测试数据 # 5655=87*65 # X_train.shape: (1341, 5655) # Test set score of 1-knn: 0.27 from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1) knn.fit(X_train, y_train) print('=' * 20) print("-- 使用KNN训练和测试原始数据 --") print('原始数据的形状: {}'.format(X_train.shape)) print('原始数据经过KNN训练后测试集的精度: {:.2f}'.format(knn.score(X_test, y_test)))
def kmeans_vector_quantization(): people = load_people() image_shape = people.images[0].shape X_train, X_test, y_train, y_test = load_train_test_faces() from sklearn.cluster import KMeans kmeans = KMeans(n_clusters=100, random_state=seed) kmeans.fit(X_train) from sklearn.decomposition import PCA pca = PCA(n_components=100, random_state=seed) pca.fit(X_train) from sklearn.decomposition import NMF nmf = NMF(n_components=100, random_state=seed) nmf.fit(X_train) fig, axes = plt.subplots(3, 15, figsize=(20, 10), subplot_kw={ 'xticks': (), 'yticks': () }) fig.suptitle('图3-30:对比K均值的簇中心与PCA和NMF找到的分量') # K均值找到的是图片的共性,PCA 找到的是图片变化最大的特征,NMF 找到的是图片中基础元素 for ax, comp_kmeans, comp_pca, comp_nmf in zip(axes.T, kmeans.cluster_centers_, pca.components_, nmf.components_): ax[0].imshow(comp_kmeans.reshape(image_shape)) ax[1].imshow(comp_pca.reshape(image_shape), cmap='viridis') ax[2].imshow(comp_nmf.reshape(image_shape)) axes[0, 0].set_ylabel('kmeans') axes[1, 0].set_ylabel('pca') axes[2, 0].set_ylabel('nmf') X_reconstructed_kmeans = kmeans.cluster_centers_[kmeans.predict(X_test)] X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test)) X_reconstructed_nmf = np.dot(nmf.transform(X_test), nmf.components_) fig, axes = plt.subplots(4, 5, figsize=(20, 10), subplot_kw={ 'xticks': (), 'yticks': () }) fig.suptitle('图3-31:利用100个分量(或簇中心)的K均值、PCA和NMF的图像重建的对比——K均值的每张图像仅使用了一个簇中心') # K均值重建的效果没有其他两种好,因为K均值取的是平均值,而每个类别有自己的特点,而不是所有特点的平均 for ax, orig, rec_kmeans, rec_pca, rec_nmf in zip(axes.T, X_test, X_reconstructed_kmeans, X_reconstructed_pca, X_reconstructed_nmf): ax[0].imshow(orig.reshape(image_shape)) ax[1].imshow(rec_kmeans.reshape(image_shape)) ax[2].imshow(rec_pca.reshape(image_shape)) ax[3].imshow(rec_nmf.reshape(image_shape)) axes[0, 0].set_ylabel('original') axes[1, 0].set_ylabel('kmeans') axes[2, 0].set_ylabel('pca') axes[3, 0].set_ylabel('nmf')