Ejemplo n.º 1
0
def replaceNanWithMeam():
    datMat = pca.loadDataSet("./data/secom.data", " ")
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i])
        datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal
    return datMat
Ejemplo n.º 2
0
def replaceNanWithMean():
    datMat = pca.loadDataSet('secom.data', ' ')
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:,i].A))[0],i]) #values that are not NaN (a number)
        datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal  #set NaN values to mean
    return datMat
Ejemplo n.º 3
0
def replaceNanWithMean():
    datMat = pca.loadDataSet('secom.data', ' ')
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0],
                              i])  #values that are not NaN (a number)
        datMat[nonzero(isnan(datMat[:, i].A))[0],
               i] = meanVal  #set NaN values to mean
    return datMat
Ejemplo n.º 4
0
def replaceNanWithMean():
    datMat = pca.loadDataSet(
        '/Users/weiwenjing/Desktop/M.L./machinelearninginaction/Ch13/secom.data',
        ' ')
    numFeat = shape(datMat)[1]
    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0],
                              i])  #values that are not NaN (a number)
        datMat[nonzero(isnan(datMat[:, i].A))[0],
               i] = meanVal  #set NaN values to mean
    return datMat
Ejemplo n.º 5
0
def replaceNanWithMean():
    datMat = pca.loadDataSet('secom.data', ' ')
    numFeat = shape(datMat)[1]

    for i in range(numFeat):
        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0],
                              i])  # 求非NAN值的平均值

        datMat[nonzero(isnan(datMat[:, i]))[0], i] = meanVal

    return datMat
Ejemplo n.º 6
0
import numpy as np
from numpy import *
import matplotlib.pyplot as plt


def is_num(str):
    try:
        if str == 'NaN':
            return False
        float(str)
        return True
    except ValueError:
        return False


datastring = pca.loadDataSet('imports-85.data', ',')
relist = [[], [],
          [
              'alfa-romero', 'audi', 'bmw', 'chevrolet', 'dodge', 'honda',
              'isuzu', 'jaguar', 'mazda', 'mercedes-benz', 'mercury',
              'mitsubishi', 'nissan', 'peugot', 'plymouth', 'porsche',
              'renault', 'saab', 'subaru', 'toyota', 'volkswagen', 'volvo'
          ], ['diesel', 'gas'], ['std', 'turbo'], ['four', 'two'],
          ['hardtop', 'wagon', 'sedan', 'hatchback', 'convertible'],
          ['4wd', 'fwd', 'rwd'], ['front', 'rear'], [], [], [], [], [],
          ['dohc', 'dohcv', 'l', 'ohc', 'ohcf', 'ohcv', 'rotor'],
          [
              '', '', 'two', 'three', 'four', 'five', 'six', '', 'eight', '',
              '', '', 'twelve'
          ], [],
          ['1bbl', '2bbl', '4bbl', 'idi', 'mfi', 'mpfi', 'spdi',
Ejemplo n.º 7
0
        ycord1.append(y)
    elif groupNum == 2:
        x = r0 + 0.0
        y = 1.0 * r1 + x
        xcord2.append(x)
        ycord2.append(y)
    fw.write("%f\t%f\t%d\n" % (x, y, groupNum))

fw.close()
fig = plt.figure()
ax = fig.add_subplot(211)
ax.scatter(xcord0, ycord0, marker='^', s=90)
ax.scatter(xcord1, ycord1, marker='o', s=50, c='red')
ax.scatter(xcord2, ycord2, marker='v', s=50, c='yellow')
ax = fig.add_subplot(212)
myDat = pca.loadDataSet('../data/ch13/testSet3.txt')
lowDDat, reconDat = pca.pca(myDat[:, 0:2], 1)
label0Mat = lowDDat[np.nonzero(
    myDat[:, 2] == 0)[0], :2][0]  # get the items with label 0
label1Mat = lowDDat[np.nonzero(
    myDat[:, 2] == 1)[0], :2][0]  # get the items with label 1
label2Mat = lowDDat[np.nonzero(
    myDat[:, 2] == 2)[0], :2][0]  # get the items with label 2
ax.scatter(label0Mat[:, 0].tolist(),
           np.zeros(np.shape(label0Mat)[0]),
           marker='^',
           s=90)
ax.scatter(label1Mat[:, 0].tolist(),
           np.zeros(np.shape(label1Mat)[0]),
           marker='o',
           s=50,
Ejemplo n.º 8
0
        x = r0 + 8.0
        y = 1.0*r1 + x
        xcord1.append(x)
        ycord1.append(y)
    elif groupNum == 2:
        x = r0 + 0.0
        y = 1.0*r1 + x
        xcord2.append(x)
        ycord2.append(y)
    fw.write("%f\t%f\t%d\n" % (x, y, groupNum))

fw.close()
fig = plt.figure()
ax = fig.add_subplot(211)
ax.scatter(xcord0,ycord0, marker='^', s=90)
ax.scatter(xcord1,ycord1, marker='o', s=50,  c='red')
ax.scatter(xcord2,ycord2, marker='v', s=50,  c='yellow')
ax = fig.add_subplot(212)
myDat = pca.loadDataSet('testSet3.txt')
lowDDat,reconDat = pca.pca(myDat[:,0:2],1)
label0Mat = lowDDat[nonzero(myDat[:,2]==0)[0],:2][0] #get the items with label 0
label1Mat = lowDDat[nonzero(myDat[:,2]==1)[0],:2][0] #get the items with label 1
label2Mat = lowDDat[nonzero(myDat[:,2]==2)[0],:2][0] #get the items with label 2
#ax.scatter(label0Mat[:,0],label0Mat[:,1], marker='^', s=90)
#ax.scatter(label1Mat[:,0],label1Mat[:,1], marker='o', s=50,  c='red')
#ax.scatter(label2Mat[:,0],label2Mat[:,1], marker='v', s=50,  c='yellow')
ax.scatter(label0Mat[:,0],zeros(shape(label0Mat)[0]), marker='^', s=90)
#ax.scatter(label1Mat[:,0],zeros(shape(label1Mat)[0]), marker='o', s=50,  c='red')
#ax.scatter(label2Mat[:,0],zeros(shape(label2Mat)[0]), marker='v', s=50,  c='yellow')
plt.show()
Ejemplo n.º 9
0
Archivo: 13.py Proyecto: niumeng07/ML
#!/usr/bin/env python3
#-*- coding: UTF-8 -*-
from numpy import *
import pca
import matplotlib
import matplotlib.pyplot as plt
from pylab import *

dataMat=pca.loadDataSet('testSet.txt')
print(shape(dataMat))

lowDMat,reconMat=pca.pca(dataMat,1)
print(shape(lowDMat))
fig=plt.figure(1)
ax=fig.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90)
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red')
show()

lowDMat,reconMat=pca.pca(dataMat,2)
print(shape(lowDMat))
fig2=plt.figure(2)
ax=fig2.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='^',s=90)
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker='o',s=50,c='red')
show()

dataMat=pca.replaceNanWithMean()
meanVals=mean(dataMat,axis=0)
meanRemoved=dataMat-meanVals
covMat=cov(meanRemoved,rowvar=0)
Ejemplo n.º 10
0
'''
Created on Jun 1, 2011

@author: Peter
'''
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import pca

dataMat = pca.loadDataSet('testSet.txt')
lowDMat, reconMat = pca.pca(dataMat, 1)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:, 0], dataMat[:, 1], marker='^', s=90)
ax.scatter(reconMat[:, 0], reconMat[:, 1], marker='o', s=50, c='red')
plt.show()
Ejemplo n.º 11
0
# coding=utf-8
from numpy import shape, zeros
import matplotlib.pyplot as plt
import pca

sampMtx = pca.loadDataSet("testSet3.txt")
lowDDataMat, reconMat = pca.pca(sampMtx, 2)
print shape(lowDDataMat)
print lowDDataMat
print '-------'
print reconMat
fig = plt.figure()
ax = fig.add_subplot('221')
ax.scatter(sampMtx[:, 0].flatten().A[0], sampMtx[:, 1].flatten().A[0], s=10)
y = zeros(shape(lowDDataMat))
ax.scatter(reconMat[:, 0].flatten().A[0],
           reconMat[:, 1].flatten().A[0],
           marker='o',
           s=10,
           c='red')
ax2 = fig.add_subplot('222')
ax2.scatter(lowDDataMat[:].flatten().A[0], y[:], s=10, c='green')

print '变换后的坐标'
ax = fig.add_subplot('223')
ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], s=10)

fig.show()
'''
Created on Jun 1, 2011

@author: Peter
'''
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import pca
import os

homedir = os.getcwd() + '/machinelearninginaction/ch13/'

dataMat = pca.loadDataSet(homedir + 'testSet.txt')
lowDMat, reconMat = pca.pca(dataMat, 1)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:, 0], dataMat[:, 1], marker='^', s=90)
ax.scatter(reconMat[:, 0], reconMat[:, 1], marker='o', s=50, c='red')
plt.show()
Ejemplo n.º 13
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time    : 2019/3/1 19:18
# @Author  : Arrow and Bullet
# @FileName: run.py
# @Software: PyCharm
# @Blog    :https://blog.csdn.net/qq_41800366
import pca
from numpy import *
import matplotlib
import matplotlib.pyplot as plt

dataMat = pca.loadDataSet("./data/testSet.txt")


lowDMat, reconMat = pca.pca(dataMat, 2)
m, n = shape(lowDMat)

print(m, n)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:, 0].flatten().A[0], dataMat[:, 1].flatten().A[0], marker="^", s=90)
ax.scatter(reconMat[:, 0].flatten().A[0], reconMat[:, 1].flatten().A[0], marker="o", s=50, c="red")
fig.show()
Ejemplo n.º 14
0
def test1():
    dataMat = pca.loadDataSet('testSet.txt')
    #pca.plot1(dataMat)
    lowDMat, reconMat = pca.pca(dataMat, 1)
    pca.plot2(dataMat, reconMat)
Ejemplo n.º 15
0
    """
    把数据集中所有的NAN替换为平均值
    :param datMat:  带有NaN的数据集
    :return:        替换后的数据集
    """
    numFeast = shape(datMat)[1]
    for i in range(numFeast):
        #对value不为NaN的求均值
        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i])
        #将value为NaN的值赋值为均值
        datMat[nonzero(isnan(datMat[:,i].A))[0],i] = meanVal
    return datMat

if __name__ == '__main__':
    #加载数据
    NonedatMat = pca.loadDataSet(r'C:\Users\v_wangdehong\PycharmProjects\MachineLearning_V\12.PCA\data\secom.data', ' ')
    #替换数据集中所有的NAN
    datMat = replaceNanWithMean(NonedatMat)
    #去除均值
    meanVals = mean(datMat,axis=0)
    meanRemoved = datMat - meanVals
    #计算协方差矩阵
    covMat = cov(meanRemoved,rowvar=0)
    #对该矩阵进行特征值分析
    eigVals,eigVects = linalg.eig(mat(covMat))
    print(eigVals)
    print(eigVects)
    """
    我们会看到一大堆值,但是其中很多值都是0,这就意味着这些特征都是其他特征的副本,也就是说,它们可以通过其他特征表示,而本身没有提供额外信息。
    """
    lowDat,reconMat = pca.pca(datMat,40)
Ejemplo n.º 16
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author:yiluzhang

import pca

if __name__ == "__main__":
    data_mat = pca.loadDataSet("testSet.txt")
    low_mat, recon_mat = pca.pca(data_mat, 1)
Ejemplo n.º 17
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 17 14:26:16 2017

@author: 凯风
"""

import pca
from imp import reload
import numpy as np
import matplotlib.pyplot as plt

reload(pca)
dataMat = pca.loadDataSet('testSet.txt')  # 读取数据,这个数据集是二维的
lowDMat, reconMat = pca.pca(dataMat, 1)  # 降维
np.shape(lowDMat)

# 画图
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:, 0].flatten().A[0],
           dataMat[:, 1].flatten().A[0],
           marker='^',
           s=90)
ax.scatter(reconMat[:, 0].flatten().A[0],
           reconMat[:, 1].flatten().A[0],
           marker='o',
           s=50,
           c='red')

# 在500维的数据上进行降维
Ejemplo n.º 18
0
def test_2():
    dataMat = pca.loadDataSet('testSet.txt')
    lowDMat, recomMat = pca.pca(dataMat, 2)
    print lowDMat
    print "---"
    print recomMat
Ejemplo n.º 19
0
def test_590():
    dataMat = pca.loadDataSet('secom.data')
    lowDMat, recomMat = pca.pca(dataMat, 2)
    print lowDMat
    print "---"
    print recomMat