def gen_km(meigaras=[], cluster_centers_csv = ""): strsql = "select count(*) from trade.kabuka " if len(meigaras) > 0: strsql = strsql + kf.where_col_in(meigaras, "code") cnt = sql.exec_selsql(strsql, 0) km = MiniBatchKMeans( n_clusters=np.sqrt(cnt*SAMPLE_RATE), batch_size=1000) if cluster_centers_csv != "": path = "%s/%s" % (CSV_DIR, cluster_centers_csv) if os.path.exists(path): cluster_centers = f.csv2arr(path) km.cluster_centers_ = cluster_centers return km
def _gen_km(self, bolverbose=False): strsql = "select count(*) from trade.kabuka " if len(self.meigaras) > 0: strsql = strsql + " where " + kf.where_col_in(self.meigaras, "code") cnt = sql.exec_selsql(strsql, 0)[0] path = "%s/%s" % (CSV_DIR, self.cluster_centers_csv) X = None if os.path.exists(path): X = np.array(f.csv2arr(path)) if X is None: km = MiniBatchKMeans(n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, verbose=bolverbose) else: km = MiniBatchKMeans( n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, init=X, verbose=bolverbose ) km.cluster_centers_ = X self.km = km