def make_clusters_KMeans(dat_to_cluster , nb_clust, n=15000): estimator = KMeans(n_clusters=nb_clust) if n < len(dat_to_cluster): s=np.asarray(sample(list(dat_to_cluster), k = n), dtype=np.float_) estimator= estimator.fit(s) res = [] notinit = True for i in range(0, len(dat_to_cluster) // n + 1): d = np.asarray(list(dat_to_cluster)[n * i: n * (i + 1)]) if notinit: res = list(estimator.predict(d)) notinit = False else: a =list(estimator.predict(d)) for j in a: res.append(j) else : res = estimator.fit_predict(dat_to_cluster) return res, estimator
def make_clusters(dat_to_cluster , nb_clust=4): estimator = KMeans(n_clusters=nb_clust) res = estimator.fit_predict(dat_to_cluster) return res, estimator