''' Created on 11-30-2015 @author: Wuga ''' import numpy as np import fileoperator import similarity import sys import evaluation import cluster dataset=fileoperator.GiveMeData() avepurity=[] SSE=[] for i in range(10): centroids=cluster.Kmeans(dataset.features, 2) labels=cluster.getLabels(dataset.features, centroids) avepurity.append(evaluation.Purity(dataset.label,labels,2)) SSE.append(sum((dataset.label-labels)**2)) index=np.asarray(SSE).argsort() print 'Best Sum Square Error:{0}'.format(SSE[index[0]]) print 'Best Purity of Kmean with 10 iterations: {0}'.format(avepurity[index[0]])
''' Created on 11-30-2015 @author: Wuga ''' import fileoperator import cluster import evaluation import reduction dataset=fileoperator.GiveMeData() m,n=dataset.features.shape infosaved=0 iteration=1 purity=0 while infosaved<0.9: newdata,infosaved=reduction.PCA(dataset,iteration) newdata=newdata.reshape(m,iteration) centroids=cluster.Kmeans(newdata, 2) labels=cluster.getLabels(newdata, centroids) purity=evaluation.Purity(dataset.label,labels,2) iteration+=1 print 'Number of Columns:{0}'.format(iteration-1) print 'Purity of Kmean: {0}'.format(purity) print 'Information reserved:{0}'.format(infosaved)