Beispiel #1
0
        distances = distances[:-DiscardLast]
    elif type(DiscardLast) is float:
        distances = distances[:-int(DiscardLast * len(distances))]
    callDerivative = lambda value: list(
        map(lambda x: x[1] - x[0], zip(value, value[0] + value[:-1])))
    distancesDD = callDerivative(callDerivative(distances))
    mx = max(distancesDD)
    for i in range(len(distancesDD) - 1, -1, -1):
        if distancesDD[i] == mx:
            return i, distances[i]
    return None


for i in range(90, 280, 10):
    # for i in range(145,200,5):
    # for i in range(155,165,1):# 160 makes silhouette score max
    kn = i
    esp = getEsp(kn, DiscardLast=0.1)[1]
    test = testK.Copy()
    # test.ReadData('data.csv')
    result = test.DBSCAN(eps=esp, min_samples=kn)
    result.ShowLabelInfo(output=False)
    silScore = result.getScore(method='Silhouette')
    davScore = result.getScore(method='DaviesBouldin')
    calScore = result.getScore(method='CalinskiHarabasz')
    print('K:', kn, 'ESP:{:.4f}'.format(esp), 'SIL:{:.4f}'.format(silScore),
          'CAL:{:.4f}'.format(calScore), 'DAV:{:.4f}'.format(davScore), 'CNT:',
          sum(result.distributionInfo['Num']), 'LB:',
          len(result.distributionInfo['Num']), 'LC',
          result.distributionInfo['Num'])
from Data import Data
import matplotlib.pyplot
import time

SSE = [] # sum of the squared errors
TestUpperBound = 15
test = Data()
test.ReadData('data.csv')
# test = test.pca(0.95)
for k in range(1,TestUpperBound):
    # print('Now @ k = {0}'.format(k))
    start=time.time()
    testK = test.Copy()
    result = testK.KMeans(k)
    result.ShowLabelInfo(output=False)
    time1=time.time()-start
    silScore = result.getScore(method='Silhouette')
    calScore = result.getScore(method='CalinskiHarabasz')
    davScore = result.getScore(method='DaviesBouldin')
    
    print('TIME:{:.2f}'.format(time1),'K:',k,'SIL:{:.4f}'.format(silScore),'CAL:{:.4f}'.format(calScore),'DAV:{:.4f}'.format(davScore),'CNT:',sum(result.distributionInfo['Num']),'LC',result.distributionInfo['Num'])
    SSE.append(testK.midResult.inertia_)

x = range(1,TestUpperBound)
matplotlib.pyplot.figure(figsize=(5,5))
matplotlib.pyplot.xlabel('k')
matplotlib.pyplot.ylabel('SSE')  
matplotlib.pyplot.plot(x,SSE,'o-')  
matplotlib.pyplot.show()