return sum(s)/len(s) #----------------------------------------------------------- def get_s1(): file_name = 'datasets/s1.txt' with open(file_name) as f: #header = f.readline() points = [] for line in f: items = line.strip().split(' ') r = [ float(items[0]), float(items[1]), ] points.append( Point(r) ) #random.shuffle(points) return points #----------------------------------------------------------- points = get_s1() #print(points) Point.set_features(0,1) for k in range(3, 4): model = KMeans(points, 15, 0.01) model.cluster() # model.show() print("Done") print('k = ', k, 'silhouette = ', silhouette(model.points, model.clusters))
# Md Lutfar Rahman # [email protected] # DataMining Assingment 4 from kmeans import Point, Cluster, KMeans import random from UserMatrix import UserMatrix userMat = UserMatrix() points = userMat.userpoints fet = list(range(len(userMat.movieIds))) k=3 #print(fet) Point.set_features(*fet) model = KMeans(points, k, 0.001) model.cluster() #print("clustring>>ended") print('') model.getIntraCentriodDensity() print('') model.getInterCentroidDensity()
#----------------------------------------------------------- def get_iris_data(): file_name = 'datasets/iris.csv' with open(file_name) as f: header = f.readline() points = [] for line in f: items = line.strip().split(',') r = [ float(items[0]), float(items[1]), float(items[2]), float(items[3]), items[4] ] points.append(Point(r)) random.shuffle(points) return points #----------------------------------------------------------- points = get_iris_data() Point.set_features(0, 1, 2, 3) for k in range(2, 10): model = KMeans(points, k, 0.01) model.cluster() # model.show() print('k = ', k, 'silhouette = ', silhouette(model.points, model.clusters))