Example #1
0
def test1():
    datMat = mat(kMeans.loadDataSet('testSet.txt'))
    print(min(datMat[:, 0]))
    print(kMeans.randCent(datMat, 2))
    print(kMeans.distEclud(datMat[0], datMat[1]))

    #myCentroids, clustAssing = kMeans.kMeans(datMat,3)
    myCentroids, clustAssing = kMeans.biKmeans(datMat, 4)
    print(myCentroids)

    kMeans.plot1(datMat, myCentroids)
Example #2
0
'''
Created on 2016. 2. 9.

@author: TaijinKim
'''

import kMeans
from numpy import *

dataMat = mat(kMeans.loadDataSet('../data/testSet.txt'))
# print(min(dataMat[:, 0]))
# print(min(dataMat[:, 1]))
# print(max(dataMat[:, 1]))
# print(max(dataMat[:, 0]))
# 
print(kMeans.randCent(dataMat, 2))
# 
# print(kMeans.distEclud(dataMat[0], dataMat[1]))
# myCentroids, clustAssing = kMeans.kMeans(dataMat, 4)
Example #3
0
#!/usr/bin/env python
__coding__ = "utf-8"
__author__ = "Ng WaiMing"

from kMeans import kMeans
from kMeans import loadDataSet
from kMeans import randCent
from kMeans import distEclud
from kMeans import biKmeans
from numpy import *

if __name__ == '__main__':
    dataMat = mat(loadDataSet('testSet.txt'))
    print('min(dataMat[:, 0])', min(dataMat[:, 0]), '\n')
    print('min(dataMat[:, 1])', min(dataMat[:, 1]), '\n')
    print('max(dataMat[:, 0])', max(dataMat[:, 0]), '\n')
    print('max(dataMat[:, 1])', max(dataMat[:, 1]), '\n')
    print(randCent(dataMat, 2), '\n')
    print(distEclud(dataMat[0], dataMat[1]))
    centroids, clusterAssment = kMeans(dataMat, 4)
    print('centroids:\n', centroids, '\n')
    print('clusterAssment:\n', clusterAssment, '\n')
    dataMat3 = mat(loadDataSet('testSet2.txt'))
    centList, myNewAssments = biKmeans(dataMat3, 3)
    print('centList: \n', centList, '\n')
    # fileName = '../../../../data/k-means/places.txt'
    # imgName = '../../../../data/k-means/Portland.png'
    # kMeans.clusterClubs(fileName=fileName, imgName=imgName, numClust=5)
Example #4
0
File: 10.py Project: niumeng07/ML
#!/usr/bin/env python
#-*- coding: UTF-8 -*-

import kMeans
from numpy import *

dataMat=mat(kMeans.loadDataSet('testSet.txt'))
kMeansRandCenter=kMeans.randCent(dataMat,2) #   两个中心
print(kMeansRandCenter)

centroids,clusterAssment=kMeans.kMeans(dataMat,5)

import matplotlib.pyplot as plt
fig=plt.figure(1)
plt.plot(centroids[:,0],centroids[:,1],'ro')
plt.plot(dataMat[:,0],dataMat[:,1],'bo')
plt.axis([-8,8,-8,8])
# plt.show()

kMeans.binaryKeans(dataMat,3)

dataMat3=mat(kMeans.loadDataSet('testSet2.txt'))
centList,Assments=kMeans.binaryKeans(dataMat3,3)
print("centList:",centList)
print("Assments:",Assments)
fig=plt.figure(2)
plt.plot(dataMat3[:,0],dataMat3[:,1],'bo')
plt.plot(centList[:,0],centList[:,1],'ro')
plt.axis([-10,10,-10,10])
# plt.show()
Example #5
0
#       对每个质心
#           计算质心与数据点之间的距离
#       将数据点分配到距其最近的簇
#   对每一个簇,计算簇中所有点的均值并将均值作为质心

import kMeans
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
import time
st = time.time()
k = 3
dataMat = mat(kMeans.loadDataSet('testSet2.txt'))
oldClassLabel = zeros(len(dataMat), int)
newClassLabel = ones(len(dataMat), int)
center = kMeans.randCent(dataMat, k)
dist = []
m = 0
while newClassLabel.tolist().__eq__(
        oldClassLabel.tolist()) != True:  # 所有的点的新分类的标签和旧分类的标签不一致时就继续进行划分
    m += 1  # 迭代次数
    for di in range(len(dataMat)):  # 对数据集中的每个数据点
        dist = []
        for ci in range(len(center)):  # 对每个质心,计算某个点到质心的距离
            dist.append(kMeans.distEclud(dataMat[di],
                                         center[ci]))  # dist 用于记录一个点到所有簇点的距离
        distsort = array(dist).argsort()  # 对距离排序,返回从小到大的索引
        oldClassLabel = newClassLabel.copy()  # !!!!注意这里是引用不能直接用等号,否则将会使得两个值一起变
        newClassLabel[di] = distsort[0]  # 取出索引的最小值,就是距离最近的点
    for j in range(k):
        x = mean(array(dataMat)[kMeans.find_all_index(newClassLabel, j),
import kMeans
from numpy import *

datMat = mat(kMeans.loadDataSet('testSet.txt'))
print min(datMat[:, 0])
print min(datMat[:, 1])
print max(datMat[:, 0])
print max(datMat[:, 1])
print kMeans.randCent(datMat, 2)
print kMeans.distEclud(datMat[0], datMat[1])

myCentroids, clustAssing = kMeans.kMeans(datMat, 4)
#print myCentroids, clustAssing

datMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
centList, myNewAssments = kMeans.biKmeans(datMat3, 3)
print centList
Example #7
0
import kMeans
from numpy import *
dataMat = mat(kMeans.loadDataSet('testSet.txt'))
# print dataMat

randMat = kMeans.randCent(dataMat, 2)
# print dataMat[:, 0]
# print randMat

res = kMeans.kMeans(dataMat, 4)
# print res

dataMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
kMeans.biKmeans(dataMat3, 3)

# centList, myNewAssments =
Example #8
0
import kMeans
from numpy import *

# 导入txt数据
datMat = mat(kMeans.loadDataSet('data2.txt'))

# datMat矩阵的第2-4列分别对应半长轴、偏心率和轨道倾角
datMat[0, 2:5]

# 计算距离
delta_v = kMeans.distdeltaV(datMat[0, 2:5], datMat[1, 2:5])

# 随机生成k个质心
centroids = kMeans.randCent(datMat[:, 2:5], 4)

# k-均值聚类
myCentroids, clustAssing = kMeans.kMeans(datMat[:, 2:5], 5, kMeans.distdeltaV)

# 二分 k-均值聚类
centList, myNewAssments = kMeans.biKmeans(datMat[:, 2:5], 5, kMeans.distdeltaV)

# 画图
kMeans.showCluster_SRQ(datMat[:, 2:5], myNewAssments)
Example #9
0
Created on Sun May 14 11:57:07 2017

@author: 凯风
"""

import kMeans
import numpy as np
from imp import reload

reload(kMeans)
datMat = np.mat(kMeans.loadDataSet('testSet.txt'))
min(datMat[:,0])
max(datMat[:,0])
min(datMat[:,1])
max(datMat[:,1])
kMeans.randCent(datMat,2)   # 看一下初始化的质心是否在取值范围内
kMeans.distEclud(datMat[0],datMat[1])

# 在实际数据上看下K-means
reload(kMeans)
datMat = np.mat(kMeans.loadDataSet('testSet.txt'))
myCentroids,clustAssing = kMeans.kMeans(datMat,4)   # 不一定是全局最优解


# 二分k-means
reload(kMeans)
datMat3 = np.mat(kMeans.loadDataSet('testSet2.txt'))
centList,myNewAssments = kMeans.biKmeans(datMat3,3) # 其实依然无法保证全局最优解,只能是局部最优解
centList
myNewAssments
import kMeans
from numpy import *

datMat = mat(kMeans.loadDataSet('testSet.txt'))
print min(datMat[:,0])
print min(datMat[:,1])
print max(datMat[:,0])
print max(datMat[:,1])
print kMeans.randCent(datMat, 2)
print kMeans.distEclud(datMat[0], datMat[1])

myCentroids, clustAssing = kMeans.kMeans(datMat, 4)
#print myCentroids, clustAssing

datMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
centList, myNewAssments = kMeans.biKmeans(datMat3, 3)
print centList
import kMeans
import os
import sys
from numpy import *

project_path = os.path.abspath(os.path.dirname(__file__))
text_path = os.path.join(project_path, "../chapter10/testSet.txt")
datMat = mat(kMeans.loadDataSet(text_path))
print(min(datMat[:, 0]))
print(min(datMat[:, 1]))
print(max(datMat[:, 1]))
print(max(datMat[:, 0]))

print(kMeans.randCent(datMat, 2))

print(kMeans.distEclud(datMat[0], datMat[1]))
Example #12
0
from time import sleep
import urllib
import json
# homedir= os.getcwd()+'/machinelearninginaction/ch10/'  #绝对路径
homedir = ''  #相对路径

#10.1 k均值聚类算法
datMat = mat(kMeans.loadDataSet(homedir + 'testSet.txt'))
myCentroids, clustAssing = kMeans.kMeans(datMat, 4)

print "datMat:", datMat
print "min(datMat[:,0]):", min(datMat[:, 0])
print "min(datMat[:,1]):", min(datMat[:, 1])
print "max(datMat[:,0]):", max(datMat[:, 0])
print "max(datMat[:,1]):", max(datMat[:, 1])
print "randCent(datMat,2):", kMeans.randCent(datMat, 2)
print "distEclud( datMat[ 0], datMat[ 1]):", kMeans.distEclud(
    datMat[0], datMat[1])
print "myCentroids:", myCentroids
print "clustAssing:", clustAssing
print ":",
print ":",

#10.3 二分k均值算法
datMat3 = mat(kMeans.loadDataSet(homedir + 'testSet2.txt'))
centList, myNewAssments = kMeans.biKmeans(datMat3, 3)
print "datMat3:", datMat3
print "centList:", centList
print "myNewAssments:", myNewAssments

#10.4.1 Yahoo!PlaceFinder API
import kMeans
from numpy import *

# 构建矩阵
datMat = mat(kMeans.loadDataSet('testSet.txt'))
min(datMat[:, 0])
min(datMat[:, 1])
max(datMat[:, 1])
max(datMat[:, 0])
# 支持函数
kMeans.randCent(datMat, 2)
kMeans.distEclud(datMat[0], datMat[1])

# 聚类
from importlib import reload
reload(kMeans)
datMat = mat(kMeans.loadDataSet('testSet.txt'))
myCentroids, clustAssing = kMeans.kMeans(datMat, 4)

# 二分
from importlib import reload
reload(kMeans)
datMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
centList, myNewAssments = kMeans.biKmeans(datMat3, 3)
centList

# 示例
from importlib import reload
reload(kMeans)
geoResults = kMeans.geoGrab('1 VA Center', 'Augusta, ME')
geoResults['ResultSet']['Error']
def test1():
    dataMat = np.mat(kMeans.loadDataSet('testSet.txt'))
    print kMeans.randCent(dataMat, 2)
    print kMeans.distEclud(dataMat[0], dataMat[1])