def _gen_sgdcls(self): coef_path = "%s/%s" % (CSV_DIR, self.coef_csv) intercept_path = "%s/%s" % (CSV_DIR, self.coef_intercept) coef = None if os.path.exists(coef_path): coef = np.array(f.csv2arr(coef_path)) intercept = None if os.path.exists(intercept_path): coef = np.array(f.csv2arr(intercept_path)) sgdcls = SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=N_JOBS, penalty='l2', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False) if coef is not None: sgdcls.coef_ = coef sgdcls.intercept_ = intercept self.sgdcls = sgdcls
def gen_km(meigaras=[], cluster_centers_csv = ""): strsql = "select count(*) from trade.kabuka " if len(meigaras) > 0: strsql = strsql + kf.where_col_in(meigaras, "code") cnt = sql.exec_selsql(strsql, 0) km = MiniBatchKMeans( n_clusters=np.sqrt(cnt*SAMPLE_RATE), batch_size=1000) if cluster_centers_csv != "": path = "%s/%s" % (CSV_DIR, cluster_centers_csv) if os.path.exists(path): cluster_centers = f.csv2arr(path) km.cluster_centers_ = cluster_centers return km
def _gen_km(self, bolverbose=False): strsql = "select count(*) from trade.kabuka " if len(self.meigaras) > 0: strsql = strsql + " where " + kf.where_col_in(self.meigaras, "code") cnt = sql.exec_selsql(strsql, 0)[0] path = "%s/%s" % (CSV_DIR, self.cluster_centers_csv) X = None if os.path.exists(path): X = np.array(f.csv2arr(path)) if X is None: km = MiniBatchKMeans(n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, verbose=bolverbose) else: km = MiniBatchKMeans( n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, init=X, verbose=bolverbose ) km.cluster_centers_ = X self.km = km