Beispiel #1
0
def test2():
    dataMat = pca.replaceNanWithMean()
    meanVals = mean(dataMat, axis=0)
    meanRemoved = dataMat - meanVals
    covMat = cov(meanRemoved, rowvar=0)
    eigVals, eigVects = linalg.eig(mat(covMat))
    print(eigVals)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(eigVals[:8])
    plt.show()
#coding:utf-8

from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import pca

dataMat = pca.replaceNanWithMean()

#below is a quick hack copied from pca.pca()
meanVals = mean(dataMat, axis=0)
meanRemoved = dataMat - meanVals #remove mean
covMat = cov(meanRemoved, rowvar=0)
eigVals,eigVects = linalg.eig(mat(covMat))
eigValInd = argsort(eigVals)            #sort, sort goes smallest to largest
eigValInd = eigValInd[::-1]#reverse
sortedEigVals = eigVals[eigValInd]
total = sum(sortedEigVals)
varPercentage = sortedEigVals/total*100

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(range(1, 21), varPercentage[:20], marker='^')
plt.xlabel('Principal Component Number')
plt.ylabel('Percentage of Variance')
plt.show()
Beispiel #3
0
import matplotlib.pyplot as plt
from pylab import *

dataMat=pca.loadDataSet('testSet.txt')
print(shape(dataMat))

lowDMat,reconMat=pca.pca(dataMat,1)
print(shape(lowDMat))
fig=plt.figure(1)
ax=fig.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90)
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red')
show()

lowDMat,reconMat=pca.pca(dataMat,2)
print(shape(lowDMat))
fig2=plt.figure(2)
ax=fig2.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90)
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red')
show()

dataMat=pca.replaceNanWithMean()
meanVals=mean(dataMat,axis=0)
meanRemoved=dataMat-meanVals
covMat=cov(meanRemoved,rowvar=0)
eigVals,eigVects=linalg.eig(mat(covMat))
print(eigVals)  #   特征值


Beispiel #4
0
"""

import pca
from imp import reload
import numpy as np
import matplotlib.pyplot as plt

reload(pca)
dataMat = pca.loadDataSet('testSet.txt')  # 读取数据,这个数据集是二维的
lowDMat, reconMat = pca.pca(dataMat, 1)  # 降维
np.shape(lowDMat)

# 画图
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:, 0].flatten().A[0],
           dataMat[:, 1].flatten().A[0],
           marker='^',
           s=90)
ax.scatter(reconMat[:, 0].flatten().A[0],
           reconMat[:, 1].flatten().A[0],
           marker='o',
           s=50,
           c='red')

# 在500维的数据上进行降维
reload(pca)
dataMat = pca.replaceNanWithMean()  # 获取处理后的数据
pca.createFig()  # 根据这个可以看到特征数量在20左右就覆盖了数据集的绝大部分的方差了,可以根据这个选择降维的参数
lowDMat, reconMat = pca.pca(dataMat, 20)  # 降维