def main(self, args): seed = 71 print("For dataset1") dataSet = KMeans.readDataSet("dataset1.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.getEpsilonFromCurve(dataSet) #self.e = self.getEpsilon(dataSet) #set e manully according to curve self.e = 0.49 print("Esp :" + str(self.e)) self.dbscan(dataSet,1) print("\nFor dataset2") dataSet = KMeans.readDataSet("dataset2.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.getEpsilonFromCurve(dataSet) #self.e = self.getEpsilon(dataSet) #set e manully according to curve self.e = 0.6 print("Esp :" + str(self.e)) self.dbscan(dataSet,2) print("\nFor dataset3") dataSet = KMeans.readDataSet("dataset3.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.getEpsilonFromCurve(dataSet) #set e manully according to curve #self.e = self.getEpsilon(dataSet) self.e = 0.2 print("Esp :" + str(self.e)) self.dbscan(dataSet,3)
def main(): parser = argparse.ArgumentParser() parser.add_argument('path', type=str, help="path to dataset") parser.add_argument('--k', type=int, default=3, help="quantity of clusters (default 3)") parser.add_argument('--it', type=int, default=100, help="max iterations (default 100)") parser.add_argument('--tol', type=float, default=0.001, help="tolerance (default 0.001)") args = parser.parse_args() csvManager = CSVManager() df = csvManager.read(args.path) df = csvManager.replaceNan(df) formattedCSV = csvManager.deleteObjectColumns(df) matrix = csvManager.convertCSVToMatrix(formattedCSV) kmeans = KMeans(args.k, args.it, args.tol) kmeans.fit(matrix) for centroid in kmeans.centroids: plt.scatter(kmeans.centroids[centroid][0], kmeans.centroids[centroid][1], marker="o", color="k", s=150, linewidths=5) for classification in kmeans.classifications: color = randomColor() for featureset in kmeans.classifications[classification]: plt.scatter(featureset[0], featureset[1], marker="x", color=color, s=60, linewidths=2) plt.show()
def main(self, args): print("For dataset1") self.dataSet = KMeans.readDataSet("dataset1.txt") self.K = DataPoints.getNoOFLabels(self.dataSet) self.W = [[0.0 for y in range(self.K)] for x in range(len(self.dataSet))] self.w = [0.0 for x in range(self.K)] self.GMM() print("\n\n\nFor dataset2") self.dataSet = KMeans.readDataSet("dataset2.txt") self.K = DataPoints.getNoOFLabels(self.dataSet) self.W = [[0.0 for y in range(self.K)] for x in range(len(self.dataSet))] self.w = [0.0 for x in range(self.K)] self.GMM() print("\n\n\nFor dataset3") self.dataSet = KMeans.readDataSet("dataset3.txt") self.K = DataPoints.getNoOFLabels(self.dataSet) self.W = [[0.0 for y in range(self.K)] for x in range(len(self.dataSet))] self.w = [0.0 for x in range(self.K)] self.GMM()
def main(self, args): seed = 71 print("For dataset1") dataSet = KMeans.readDataSet("dataset1.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.e = self.getEpsilon(dataSet) print(("Esp :" + str(self.e))) self.dbscan(dataSet) print("\nFor dataset2") dataSet = KMeans.readDataSet("dataset2.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.e = self.getEpsilon(dataSet) print(("Esp :" + str(self.e))) self.dbscan(dataSet) print("\nFor dataset3") dataSet = KMeans.readDataSet("dataset3.txt") random.Random(seed).shuffle(dataSet) self.noOfLabels = DataPoints.getNoOFLabels(dataSet) self.e = self.getEpsilon(dataSet) print(("Esp :" + str(self.e))) self.dbscan(dataSet)
def fit(self, data): kmeans = KMeans(n_clusters=self.n_clusters) kmeans.fit(data) candidate = [] for k in kmeans.centroids: candidate.append(kmeans.centroids[k]) candidate = np.array(candidate).ravel() self.dim = data.shape[1] self.pso = PSO(dim=self.dim * self.n_clusters, minf=0, maxf=1, swarm_size=self.swarm_size, n_iter=self.n_iter, w=self.w, lb_w=self.lb_w, c1=self.c1, c2=self.c2) self.pso.set_candidate(candidate) self.pso.optimize(self.__objective_function, customizable=True, dim=self.dim, n_clusters=self.n_clusters, data=data) self.centroids = {} raw_centroids = self.pso.global_optimum.pos.reshape( (self.n_clusters, self.dim)) for centroid in range(len(raw_centroids)): self.centroids[centroid] = raw_centroids[centroid]
def main(): path = sys.argv[1] csvManager = CSVManager() df = csvManager.read(path) df = csvManager.replaceNan(df) formattedCSV = csvManager.deleteObjectColumns(df) matrix = csvManager.convertCSVToMatrix(formattedCSV) try: for k in range(2, 5): kmeans = KMeans(k) kmeans.fit(matrix) for centroid in kmeans.centroids: plt.scatter(kmeans.centroids[centroid][0], kmeans.centroids[centroid][1], marker="o", color="k", s=150, linewidths=5) for classification in kmeans.classifications: color = randomColor() for featureset in kmeans.classifications[classification]: plt.scatter(featureset[0], featureset[1], marker="x", color=color, s=60, linewidths=2) plt.show() confusionMatrix, purity = kmeans.purity() saveData(confusionMatrix, purity, path, k) except Exception: print("An empty cluster was found, please run the program again. This program does not handle empty clusters")
def main(): path = sys.argv[1] csvManager = CSVManager() df = csvManager.read(path) df = csvManager.replaceNan(df) formattedCSV = csvManager.deleteObjectColumns(df) formattedCSV = csvManager.deleteObjectColumns(df) matrix = csvManager.convertCSVToMatrix(formattedCSV) try: with open('result/result.txt', 'w') as file: res = '' for k in range(2, 5): kmeans = KMeans(k) kmeans.fit(matrix) simplifiedSilhouette = SimplifiedSilhouette( formattedCSV, kmeans) sswc = simplifiedSilhouette.calculate() res += 'K = ' + str(k) + '; ' + 'SSWC = ' + str(sswc) + '\n' file.write(res) except Exception: print("An empty cluster was found, please run the program again. This program does not handle empty clusters")
def test_KMeans_convert(): """ Test that KMeans has a working test abstract method """ some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) m = KMeans(some) assert m.convert(3)
def train_KMeans_train(): """ Test that KMeans has a working train abstract method """ some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) m = KMeans(some) assert m.train()
def test_KMeans_init(): """ Given a pandas dataframe, test the creation of a KMeans class. """ some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) m = KMeans(some) data_2 = m.getData() assert some.equals(data_2)
def test_KMeans_distance(): """ test that finding the sum of squared distance is correct """ some = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) m = KMeans(some) x = pd.Series([1, 2]) y = pd.Series([1, 4]) assert np.sqrt(4 / 2) == m.distance(x, y)
def treeClassification(data): # pca = PCA(n_components=2) # pca_data = pca.fit_transform(data) km = KMeans(n_clusters=6, max_iter=200) km.fit(data.values, True) # km = KMeans(n_clusters=6) # clusters = km.fit_predict(data) cluster_report(data, km.prediction)
def main(): dim = 2 num_class = 3 dataset_dir = '../input/wine.csv' train_x, train_y, raw_data = data_loader(dataset_dir) pca = PCA(first_k=dim, use_threshold=False, threshold=0.5) proj = pca.fit(train_x) kmeans = KMeans(K=num_class) center, predict_y = kmeans.fit(proj) result = evaluate(proj, train_y, predict_y, k=num_class) visualization(center, proj, predict_y, dim) save_to_csv(raw_data, predict_y) print(result)
def attr_analysis(data): km = KMeans(n_clusters=6, max_iter=200) km.fit(data.values, True) for cluster in km.clusters: for i in range(len(cluster.data[0])): col = _column(cluster.data, i) ax = plt.subplot(3, 6, i + 1) ax.set_title(data.columns[i], {'fontsize': 6}) plt.boxplot(col) plt.show()
def loadFileKMeans(file,classNameIndex): k = KMeans(constants.getN()) fileHelper = FileHelper() c1 = KMeansClass(0,"Iris-setosa") c1.setVCenter([4.6,3.0,4.0,0.0]) c2 = KMeansClass(1,"Iris-versicolor") c2.setVCenter([6.8,3.4,4.6,0.7]) k.addClass(c1) k.addClass(c2) try: f = fileHelper.openReadOnlyFile(file) lineas = f.readlines() uMatrix = constants.getKMeansInitializeUMAtrix(len(lineas)) k.setUMatrix(uMatrix) xVector = [] for linea in lineas: xVector = linea.strip("\r\n").split(",") del xVector[classNameIndex-1] xVector = [float(x) for x in xVector] k.addXVector(xVector) return k except: print("Error al leer el fichero")
def sse_plot(X, start=2, stop=20): inertia = [] for x in range(start, stop): print("====ITERATION:", x) km = KMeans(n_clusters=x, max_iter=1000) km.fit(X, True) inertia.append(km.sum_squared_error()) plt.figure(figsize=(12, 6)) plt.plot(range(start, stop), inertia, marker='o') plt.xlabel('Number of Clusters') plt.ylabel('SSE') plt.title('Inertia plot with K') plt.xticks(list(range(start, stop))) plt.show()
def __init__(self,data, ansdict,numCen, tabuindex, pheromap, alpha, beta, max_itter, decay): self.centroid = [data[random.randrange(0,len(data))].copy()for i in range(numCen)] self.defCentroid = self.centroid.copy() self.clust = KMeans(data, self.centroid, ansdict) self.max_itter = max_itter self.alpha = alpha self.beta = beta self.pheromap = pheromap self.tabu = {} self.decay = decay self.fitness = 0 self.numCen = numCen self.data = data self.tabuIndex = tabuindex
def computeGMeans(self, X): centroid = np.mean(X, axis=0) km = KMeans() km.split(dataSet=X) v = km.c_0 - km.c_1 X_prime = scale(X.dot(v) / (v.dot(v))) accept_split = GMeans.checkGaussianStatistic(X_prime, self.strickLevel) if accept_split: self.computeGMeans(km.cluster_0) self.computeGMeans(km.cluster_1) else: self.centroids.append(centroid)
def initialize_(self, X): n, p = X.shape # kmeans initialization if self.initialization_ == 'kmeans': kmeans_clstr = KMeans(nr_clusters=self.k_, n_init=1) kmeans_clstr.fit(X) labels = kmeans_clstr.labels_ self.cond_prob_ = np.zeros((n, self.k_)) for i in range(n): j = int(labels[i]) self.cond_prob_[i, j] = 1 # else randomly initialize them else: foo = np.random.rand(n, self.k_) self.cond_prob_ = foo / np.sum(foo, axis=1)[:, np.newaxis]
def main(): #load data X = handle_data('data2.txt') km = KMeans(5) km.fit(X) #Plotting colors = 10 * [ 'gold', 'mediumseagreen', 'orangered', 'lightpink', 'coral', 'mediumslateblue', 'violet', 'magenta' ] plt.figure(figsize=(10, 10)) #plotting each feature by using corresponding color for classification in km.classes: color = colors[classification] #features for features in km.classes[classification]: plt.scatter(features[0], features[1], color=color, s=10) #plt.scatter(np.mean(features[0]), np.mean(features[1]), marker='*', c = 'k',s = 150) #Centroid centers for centroid in km.centroids: plt.scatter(km.centroids[centroid][0], km.centroids[centroid][1], c='k', s=100, marker="x") #random inital points for l in range(km.k): plt.scatter(km.randoms[l][0], km.randoms[l][1], marker='*', c='k', s=100) #plot attributes plt.legend(['* = Initial random points', 'X = Final cluster centers']) plt.xlabel('x1') plt.ylabel('x2') plt.title('k-Means') plt.show() print('\t\t\tIteration:', km.iterations) print('\n\t\t\tk value: ', km.k)
def main(): if len(sys.argv) < 3: assert Error("need input argument.") _, csv_path, K = sys.argv K = int(K) X = pd.read_csv(csv_path).values X = PCAHelper.parse_data(X) # Steps 1-5 # k-means and k-means++ execution km_clusters, km_centroids, km_distances = KMeans.execute(X, K) kmpp_clusters, kmpp_centroids, kmpp_distances = KMeanspp.execute(X, K) print("km dist={}, kmpp dist={}".format(km_distances[-1], kmpp_distances[-1])) # pca X = PCAHelper.pca_helper(X, 2) # plot Utils.plot_data2(X, K, km_clusters, title="K-means clustering with PCA", xaxis="First Principal Component", yaxis="Second Principal Component") Utils.plot_data2(X, K, kmpp_clusters, title="K-means++ clustering with PCA", xaxis="First Principal Component", yaxis="Second Principal Component")
def extract(self, image_path, max_colors=6, method="kmeans", save_output=True, show_img=True): print "path: ", image_path source = cv2.imread(image_path) if method == "kmeans": km = KMeans(source, max_colors) else: km = MMCQ(source, max_colors) theme = km.quantize() for i, color in enumerate(theme): self.addToCanvas(color, i, max_colors) if show_img: self.canvas.show() if save_output: img_name = image_path.split("/")[-1] swatch_path = "../swatches/" + img_name self.canvas.save(swatch_path)
def __init__(self, initializer='support', cov_type='full'): assert initializer in [ 'support', 'uniform' ], 'Please select initialization scheme as support or uniform' assert cov_type in [ 'full', 'tied', 'diag', 'spherical' ], 'Please select covariance type as full, tied, diag, or spherical' self.kmeans_cls_ = KMeans() self.means_ = None self.cov_ = None self.mixture_weights_ = None self.membership_weights_ = None self.k_ = None self.ll_graph_ = [] self.initializer_ = initializer self.cov_type_ = cov_type
def main(argv): X, K, init, movie_ids = DataProcessing.process_input(argv) if init == "random": clusters, centroids, distances = KMeans.execute(X, K) print( "Ran k-means. Start Distance={:.0f}, End Distance={:.0f}. Clusters = {}." .format(distances[0], distances[-1], clusters)) Utils.write_output_csv(clusters, "output.csv", movie_ids) elif init == "k-means++": clusters, centroids, distances = KMeanspp.execute(X, K) print( "Ran k-means++. Start Distance={:.0f}, End Distance={:.0f}. Clusters = {}." .format(distances[0], distances[-1], clusters)) Utils.write_output_csv(clusters, "output.csv", movie_ids) elif init == "1d": X = PCAHelper.pca_helper(X, 1) X.astype(np.float16) distances_by_k, cluster, centroids = OneDKmeans( X, K).run() # KMeans.execute(X, k) print("Ran 1d K-means. Distance={}".format(distances_by_k[-1])) # plot_data_opt_k3(distances_by_k, list(range(1,K+1))) Utils.write_output_csv(cluster, "output.csv", movie_ids) else: assert Error("init parameter was not inputted correctly!")
def test_KMeans_dtype(): """ Test that the initialization of a KMeans class throws a type error for things that are not pandas dataframes """ some = "A wrong data type of type string" with pytest.raises(TypeError): KMeans(some)
def execute(X, K): # Initialize Centroids (Cluster center) centroids = KMeanspp.initialize_centroids(X, K) clusters, centroids, k_means_dist = KMeans.run_k_means(X, centroids, n_iter=15) # Utils.plot_data(X, centroids, clusters) # Utils.plot_data_3d(samples, current_centroids, clusters) return np.array(clusters), centroids, k_means_dist
def main(): random_seed = 0 iteration = 50 init_method = 'kmeans++' X, y_true = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=random_seed) plt.scatter(X[:, 0], X[:, 1], s=4, c='blue') kmeans = KMeans() #kmeans.fit_range(X, list(range(3, 7)), random_seed=random_seed, iteration=iteration, init_method=init_method) kmeans.fit(X, 4, random_seed=random_seed, iteration=iteration, init_method=init_method) y_pred = kmeans.predict(X) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(X[:, 0], X[:, 1], c=y_pred, s=4, cmap='viridis') centers = kmeans.centroids ax.scatter(centers[:, 0], centers[:, 1], c='red', s=15, alpha=0.5) plt.show()
def runRegressionAlgorithms(dataset): k_values = [5, 10, 15] # running knn in respect to k value for i in k_values: print("Running KNN with K of {}".format(i)) dataset.runAlgorithm(KNearestNeighbor(i)) k = math.ceil(len(dataset.data) / 4) # running kmeans in respect to the dataset KMeans(dataset, k)
def main(): """docstring for main""" kmeans = KMeans("test.txt") clusters = kmeans.k_means() plan = {} saved_num = 0 ambu_id_start = 1 plans = [] sorted_cluster = sorted(clusters.items(), key=lambda x: x[0].id) for item in sorted_cluster: cluster_plan = RescuePlan(item[0], item[1], ambu_id_start) print item[0].result_print() ambu_id_start += item[0].ambu plans.append(cluster_plan) print "" for plan in plans: num = plan.plan()
def clustering(self, x_data): similarity_matrix = self.similarity_matrix(x_data.T) degree_vector = [] for weight_row in similarity_matrix: degree = np.sum(weight_row) degree_vector.append(degree) degree_matrix = np.diag(np.array(degree_vector)) laplacian_matrix = degree_matrix - similarity_matrix eig_values, eig_vectors = np.linalg.eig(laplacian_matrix) idx = eig_values.argsort() eig_values = eig_values[idx] eig_vectors = eig_vectors[:, idx] eig_vectors_smallest = eig_vectors[:, 0:self.K] # print(eig_vectors_smallest) # cluster1 = [] # cluster2 = [] # for i in range(len(eig_vectors_smallest[:, 0])): # if eig_vectors_smallest[i, 0] == 0: # print("dsjfndfj") # cluster1.append(eig_vectors_smallest[i]) # else: # cluster2.append(eig_vectors_smallest[i]) # cluster1 = np.array(cluster1) # cluster2 = np.array(cluster2) # X11 = cluster1[:, 0] # X21 = cluster1[:, 1] # X12 = cluster2[:, 0] # X22 = cluster2[:, 1] # plt.title("Clustering of 2-dim(axes found using Spectral Clustering) data using Spectral Clustering on dataset2") # plt.scatter(X11, X21, color="r") # plt.scatter(X12, X22, color="b") # plt.show() kmeans = KMeans(K=self.K, L=1) centroids, num_of_points_group, grouping, final_grouping = kmeans.clustering( np.real(eig_vectors_smallest.T)) clusters = [] for j in range(len(final_grouping[0, :])): cluster_points = [] for i in range(len(final_grouping[:, j])): if final_grouping[i, j] == 1: cluster_points.append(i) clusters.append(cluster_points) return clusters, final_grouping
def test(self, k, data, random=False): kmeans = KMeans() book = kmeans.iterate(k, data, random) print('---Start---') for i in range(len(book)): b = book[i] print(b[0]) listX = [] listY = [] repVecX = [b[0][0]] repVecY = [b[0][1]] for vec in b[1]: listX.append(vec[0]) listY.append(vec[1]) plt.plot(listDX, listDY, 'ro', listX, listY, 'g^', repVecX, repVecY, 'bs') plt.axis([-5, 30, -5, 30]) plt.show() plt.clf() print('---End---')
def visualization_2d(data): # reduce dimesions of dataset based on data variance (PCA) pca = PCA(n_components=2) pca_data = pca.fit_transform(data) # Do KMeans for PCA data n_clusters(6 or 7) km = KMeans(n_clusters=6, max_iter=200) km.fit(pca_data, True) colors = ['red', 'green', 'blue', 'purple', 'orange', 'yellow', 'gray'] for i in range(len(km.clusters)): pc1 = [] pc2 = [] for row in km.clusters[i].data: pc1.append(row[0]) pc2.append(row[1]) plt.scatter(pc1, pc2, c=colors[i], label='cluster ' + str(i)) plt.show()
def main(): logging.basicConfig(filename="result/log.txt", filemode='w', format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S', level=logging.DEBUG) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('-n_clusters', type=int, default=5) parser.add_argument('-n_points', type=int, default=100) opt = parser.parse_args() tester = Tester(n_gaussian_clusters=opt.n_clusters) # Generate data from n 2d multivariate gaussian parameters data, labels = tester.generate_2d_gaussian_points( how_many_per_each_gaussian=opt.n_points) logging.info(" Generated {} data points from {} different 2 dimensional " "multivariate gaussian distributions. ({} data points for " "each cluster.)".format(opt.n_clusters * opt.n_points, opt.n_clusters, opt.n_points)) # Raw Data utils.draw(data, labels, without_label_color=True, means=None, title="Data", save="result/raw.png", show=False) utils.draw(data, labels, without_label_color=False, means=tester.means, title="Gaussian", save="result/gaussian.png", show=False) # KMeans Prediction kmeans = KMeans(n_cluster=opt.n_clusters) prediction_lables, prediction_centers = kmeans.fit(data) utils.draw(data, prediction_lables, without_label_color=False, means=prediction_centers, title="KMeans", save="result/kmeans.png", show=False) # Concatenate results png_list = ["result/raw.png", "result/gaussian.png", "result/kmeans.png"] utils.concatenate_pngs(png_list, "result/final.png")
def main(): km = KMeans(3) iris = pd.read_csv("iris.csv") data = np.array( iris[["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]].values.tolist() ) km.fit(data) print("cluster centers: %s" % km.cluster_centers) for d in iris.values: prediction = km.predict([[ d[2], d[2], d[3], d[4] ]]) print(d[5]+" - "+str(prediction[0]))
def kmeans_test(vid_src): _, frame = vid_src.read() used_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) model = KMeans(used_frame, 3) # applying background detection while frame is not None: used_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) fg = model.apply(used_frame) cv2.imshow('img', used_frame) cv2.imshow('fg', fg) prev_frame = np.copy(frame) _, frame = vid_src.read() # print model.get_background_model().get_density_range(used_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break pass
""" A working demo using KMeans """ import numpy as np import scipy.io as sio import matplotlib.pyplot as plt data = sio.loadmat('data.mat') X = np.array(data['X']) from KMeans import KMeans k = 3 est = KMeans(k) c = est.train(X) colors=np.array(['green', 'red', 'blue']) # lets plot on matplotlib for i in range(k): x = X[np.where(c == i)[0]] plt.scatter(x[:, 0], x[:, 1], color=colors[i]) # plt.savefig('clustering_example.png') plt.show()
from scipy import io ############# FILE STUFF ############# trainFileMNIST = "./mnist_data/images.mat" trainMatrix = io.loadmat(trainFileMNIST) # Dictionary ############# GET DATA ############# print 20 * "#", "Getting Data", 20 * "#" imageData = np.array(trainMatrix['images']) imageData = np.rollaxis(imageData, 2, 0) # move the index axis to be the first dataShape = np.shape(imageData) print "Image Data Shape", dataShape imageDataFlat = [] for elem in imageData: imageDataFlat.append(elem.flatten()) dataShape = np.shape(imageDataFlat) print "Image Data Flat Shape", dataShape num_clusters = [5, 10, 20] for cluster in num_clusters: print 20 * "#", "Num Clusters:", cluster, 20 * "#" KM = KMeans(cluster, max_iter=10) KM.fit(imageDataFlat) visualize(KM.cluster_centers_, cluster)
from KMeans import KMeans kmeans_obj = KMeans() kmeans_obj.build_kMeans('2295420')
''' Created on 21-May-2015 @author: amilgeorge ''' import numpy as np from KMeans import KMeans if __name__ == '__main__': mean = [0,0,0] cov = [[1,1,1],[0,1,0]] import matplotlib.pyplot as plt #x = np.random.multivariate_normal(mean,cov,5000) k=KMeans() s = np.ones((2,3)) k.init_centroids(2, s) #plt.plot(x,y,'x'); plt.axis('equal'); plt.show() print "Theheheh"
import random import numpy as np import matplotlib.pyplot as plt import scipy.spatial as sp RAND = 4 X = [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)] X += [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)] X += [(i+random.random()*RAND, i+random.random()*RAND) for i in range(100)] data, X = giveit() plt.figure(0) plt.plot([e[0] for e in X], [e[1] for e in X], 'ro') k = KMeans(X, [[random.randint(0, 250), random.randint(0, 100)] for e in range(100)]) new_centers = k.compute() print new_centers clus = k.get_clusters() clus = [k for k,v in k.get_clusters().items() if len(v) > 0] new_centers = np.array(new_centers)[clus] plt.figure(0) plt.plot([e[0] for e in new_centers], [e[1] for e in new_centers], 'gd') new_centers = X k = 6