'''
Created on 11-30-2015

@author: Wuga
'''

import numpy as np
import fileoperator
import similarity
import sys
import evaluation
import cluster

dataset=fileoperator.GiveMeData()
avepurity=[]
SSE=[]
for i in range(10):
    centroids=cluster.Kmeans(dataset.features, 2)
    labels=cluster.getLabels(dataset.features, centroids)  
    avepurity.append(evaluation.Purity(dataset.label,labels,2))
    SSE.append(sum((dataset.label-labels)**2))
index=np.asarray(SSE).argsort()
print 'Best Sum Square Error:{0}'.format(SSE[index[0]])
print 'Best Purity of Kmean with 10 iterations: {0}'.format(avepurity[index[0]])
'''
Created on 11-30-2015

@author: Wuga
'''

import fileoperator
import cluster
import evaluation
import reduction


dataset=fileoperator.GiveMeData()
m,n=dataset.features.shape
infosaved=0
iteration=1
purity=0
while infosaved<0.9:
    newdata,infosaved=reduction.PCA(dataset,iteration)
    newdata=newdata.reshape(m,iteration)
    centroids=cluster.Kmeans(newdata, 2)
    labels=cluster.getLabels(newdata, centroids)
    purity=evaluation.Purity(dataset.label,labels,2)
    iteration+=1
print 'Number of Columns:{0}'.format(iteration-1)
print 'Purity of Kmean: {0}'.format(purity)
print 'Information reserved:{0}'.format(infosaved)