def replaceNanWithMeam(): datMat = pca.loadDataSet("./data/secom.data", " ") numFeat = shape(datMat)[1] for i in range(numFeat): meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal return datMat
def replaceNanWithMean(): datMat = pca.loadDataSet('secom.data', ' ') numFeat = shape(datMat)[1] for i in range(numFeat): meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number) datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal #set NaN values to mean return datMat
def replaceNanWithMean(): datMat = pca.loadDataSet('secom.data', ' ') numFeat = shape(datMat)[1] for i in range(numFeat): meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) #values that are not NaN (a number) datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal #set NaN values to mean return datMat
def replaceNanWithMean(): datMat = pca.loadDataSet( '/Users/weiwenjing/Desktop/M.L./machinelearninginaction/Ch13/secom.data', ' ') numFeat = shape(datMat)[1] for i in range(numFeat): meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) #values that are not NaN (a number) datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal #set NaN values to mean return datMat
def replaceNanWithMean(): datMat = pca.loadDataSet('secom.data', ' ') numFeat = shape(datMat)[1] for i in range(numFeat): meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) # 求非NAN值的平均值 datMat[nonzero(isnan(datMat[:, i]))[0], i] = meanVal return datMat
import numpy as np from numpy import * import matplotlib.pyplot as plt def is_num(str): try: if str == 'NaN': return False float(str) return True except ValueError: return False datastring = pca.loadDataSet('imports-85.data', ',') relist = [[], [], [ 'alfa-romero', 'audi', 'bmw', 'chevrolet', 'dodge', 'honda', 'isuzu', 'jaguar', 'mazda', 'mercedes-benz', 'mercury', 'mitsubishi', 'nissan', 'peugot', 'plymouth', 'porsche', 'renault', 'saab', 'subaru', 'toyota', 'volkswagen', 'volvo' ], ['diesel', 'gas'], ['std', 'turbo'], ['four', 'two'], ['hardtop', 'wagon', 'sedan', 'hatchback', 'convertible'], ['4wd', 'fwd', 'rwd'], ['front', 'rear'], [], [], [], [], [], ['dohc', 'dohcv', 'l', 'ohc', 'ohcf', 'ohcv', 'rotor'], [ '', '', 'two', 'three', 'four', 'five', 'six', '', 'eight', '', '', '', 'twelve' ], [], ['1bbl', '2bbl', '4bbl', 'idi', 'mfi', 'mpfi', 'spdi',
ycord1.append(y) elif groupNum == 2: x = r0 + 0.0 y = 1.0 * r1 + x xcord2.append(x) ycord2.append(y) fw.write("%f\t%f\t%d\n" % (x, y, groupNum)) fw.close() fig = plt.figure() ax = fig.add_subplot(211) ax.scatter(xcord0, ycord0, marker='^', s=90) ax.scatter(xcord1, ycord1, marker='o', s=50, c='red') ax.scatter(xcord2, ycord2, marker='v', s=50, c='yellow') ax = fig.add_subplot(212) myDat = pca.loadDataSet('../data/ch13/testSet3.txt') lowDDat, reconDat = pca.pca(myDat[:, 0:2], 1) label0Mat = lowDDat[np.nonzero( myDat[:, 2] == 0)[0], :2][0] # get the items with label 0 label1Mat = lowDDat[np.nonzero( myDat[:, 2] == 1)[0], :2][0] # get the items with label 1 label2Mat = lowDDat[np.nonzero( myDat[:, 2] == 2)[0], :2][0] # get the items with label 2 ax.scatter(label0Mat[:, 0].tolist(), np.zeros(np.shape(label0Mat)[0]), marker='^', s=90) ax.scatter(label1Mat[:, 0].tolist(), np.zeros(np.shape(label1Mat)[0]), marker='o', s=50,
x = r0 + 8.0 y = 1.0*r1 + x xcord1.append(x) ycord1.append(y) elif groupNum == 2: x = r0 + 0.0 y = 1.0*r1 + x xcord2.append(x) ycord2.append(y) fw.write("%f\t%f\t%d\n" % (x, y, groupNum)) fw.close() fig = plt.figure() ax = fig.add_subplot(211) ax.scatter(xcord0,ycord0, marker='^', s=90) ax.scatter(xcord1,ycord1, marker='o', s=50, c='red') ax.scatter(xcord2,ycord2, marker='v', s=50, c='yellow') ax = fig.add_subplot(212) myDat = pca.loadDataSet('testSet3.txt') lowDDat,reconDat = pca.pca(myDat[:,0:2],1) label0Mat = lowDDat[nonzero(myDat[:,2]==0)[0],:2][0] #get the items with label 0 label1Mat = lowDDat[nonzero(myDat[:,2]==1)[0],:2][0] #get the items with label 1 label2Mat = lowDDat[nonzero(myDat[:,2]==2)[0],:2][0] #get the items with label 2 #ax.scatter(label0Mat[:,0],label0Mat[:,1], marker='^', s=90) #ax.scatter(label1Mat[:,0],label1Mat[:,1], marker='o', s=50, c='red') #ax.scatter(label2Mat[:,0],label2Mat[:,1], marker='v', s=50, c='yellow') ax.scatter(label0Mat[:,0],zeros(shape(label0Mat)[0]), marker='^', s=90) #ax.scatter(label1Mat[:,0],zeros(shape(label1Mat)[0]), marker='o', s=50, c='red') #ax.scatter(label2Mat[:,0],zeros(shape(label2Mat)[0]), marker='v', s=50, c='yellow') plt.show()
#!/usr/bin/env python3 #-*- coding: UTF-8 -*- from numpy import * import pca import matplotlib import matplotlib.pyplot as plt from pylab import * dataMat=pca.loadDataSet('testSet.txt') print(shape(dataMat)) lowDMat,reconMat=pca.pca(dataMat,1) print(shape(lowDMat)) fig=plt.figure(1) ax=fig.add_subplot(111) ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90) ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red') show() lowDMat,reconMat=pca.pca(dataMat,2) print(shape(lowDMat)) fig2=plt.figure(2) ax=fig2.add_subplot(111) ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90) ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red') show() dataMat=pca.replaceNanWithMean() meanVals=mean(dataMat,axis=0) meanRemoved=dataMat-meanVals covMat=cov(meanRemoved,rowvar=0)
''' Created on Jun 1, 2011 @author: Peter ''' from numpy import * import matplotlib import matplotlib.pyplot as plt import pca dataMat = pca.loadDataSet('testSet.txt') lowDMat, reconMat = pca.pca(dataMat, 1) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:, 0], dataMat[:, 1], marker='^', s=90) ax.scatter(reconMat[:, 0], reconMat[:, 1], marker='o', s=50, c='red') plt.show()
# coding=utf-8 from numpy import shape, zeros import matplotlib.pyplot as plt import pca sampMtx = pca.loadDataSet("testSet3.txt") lowDDataMat, reconMat = pca.pca(sampMtx, 2) print shape(lowDDataMat) print lowDDataMat print '-------' print reconMat fig = plt.figure() ax = fig.add_subplot('221') ax.scatter(sampMtx[:, 0].flatten().A[0], sampMtx[:, 1].flatten().A[0], s=10) y = zeros(shape(lowDDataMat)) ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], marker='o', s=10, c='red') ax2 = fig.add_subplot('222') ax2.scatter(lowDDataMat[:].flatten().A[0], y[:], s=10, c='green') print '变换后的坐标' ax = fig.add_subplot('223') ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], s=10) fig.show()
''' Created on Jun 1, 2011 @author: Peter ''' from numpy import * import matplotlib import matplotlib.pyplot as plt import pca import os homedir = os.getcwd() + '/machinelearninginaction/ch13/' dataMat = pca.loadDataSet(homedir + 'testSet.txt') lowDMat, reconMat = pca.pca(dataMat, 1) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:, 0], dataMat[:, 1], marker='^', s=90) ax.scatter(reconMat[:, 0], reconMat[:, 1], marker='o', s=50, c='red') plt.show()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Time : 2019/3/1 19:18 # @Author : Arrow and Bullet # @FileName: run.py # @Software: PyCharm # @Blog :https://blog.csdn.net/qq_41800366 import pca from numpy import * import matplotlib import matplotlib.pyplot as plt dataMat = pca.loadDataSet("./data/testSet.txt") lowDMat, reconMat = pca.pca(dataMat, 2) m, n = shape(lowDMat) print(m, n) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:, 0].flatten().A[0], dataMat[:, 1].flatten().A[0], marker="^", s=90) ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], marker="o", s=50, c="red") fig.show()
def test1(): dataMat = pca.loadDataSet('testSet.txt') #pca.plot1(dataMat) lowDMat, reconMat = pca.pca(dataMat, 1) pca.plot2(dataMat, reconMat)
""" 把数据集中所有的NAN替换为平均值 :param datMat: 带有NaN的数据集 :return: 替换后的数据集 """ numFeast = shape(datMat)[1] for i in range(numFeast): #对value不为NaN的求均值 meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i]) #将value为NaN的值赋值为均值 datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal return datMat if __name__ == '__main__': #加载数据 NonedatMat = pca.loadDataSet(r'C:\Users\v_wangdehong\PycharmProjects\MachineLearning_V\12.PCA\data\secom.data', ' ') #替换数据集中所有的NAN datMat = replaceNanWithMean(NonedatMat) #去除均值 meanVals = mean(datMat,axis=0) meanRemoved = datMat - meanVals #计算协方差矩阵 covMat = cov(meanRemoved,rowvar=0) #对该矩阵进行特征值分析 eigVals,eigVects = linalg.eig(mat(covMat)) print(eigVals) print(eigVects) """ 我们会看到一大堆值,但是其中很多值都是0,这就意味着这些特征都是其他特征的副本,也就是说,它们可以通过其他特征表示,而本身没有提供额外信息。 """ lowDat,reconMat = pca.pca(datMat,40)
#!/usr/bin/env python # -*- coding: utf-8 -*- # author:yiluzhang import pca if __name__ == "__main__": data_mat = pca.loadDataSet("testSet.txt") low_mat, recon_mat = pca.pca(data_mat, 1)
# -*- coding: utf-8 -*- """ Created on Wed May 17 14:26:16 2017 @author: 凯风 """ import pca from imp import reload import numpy as np import matplotlib.pyplot as plt reload(pca) dataMat = pca.loadDataSet('testSet.txt') # 读取数据,这个数据集是二维的 lowDMat, reconMat = pca.pca(dataMat, 1) # 降维 np.shape(lowDMat) # 画图 fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:, 0].flatten().A[0], dataMat[:, 1].flatten().A[0], marker='^', s=90) ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], marker='o', s=50, c='red') # 在500维的数据上进行降维
def test_2(): dataMat = pca.loadDataSet('testSet.txt') lowDMat, recomMat = pca.pca(dataMat, 2) print lowDMat print "---" print recomMat
def test_590(): dataMat = pca.loadDataSet('secom.data') lowDMat, recomMat = pca.pca(dataMat, 2) print lowDMat print "---" print recomMat