def _gen_sgdcls(self):
     coef_path = "%s/%s" % (CSV_DIR, self.coef_csv)
     intercept_path = "%s/%s" % (CSV_DIR, self.coef_intercept)
     
     coef = None
     if os.path.exists(coef_path):
         coef = np.array(f.csv2arr(coef_path))
         
     intercept = None
     if os.path.exists(intercept_path):
         coef = np.array(f.csv2arr(intercept_path))
     
     
     sgdcls = SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
             eta0=0.0, fit_intercept=True, l1_ratio=0.15,
             learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=N_JOBS,
             penalty='l2', power_t=0.5, random_state=None, shuffle=True,
             verbose=0, warm_start=False)
     
     if coef is not None:
         sgdcls.coef_ = coef
         sgdcls.intercept_ = intercept
     self.sgdcls = sgdcls
Exemple #2
0
def gen_km(meigaras=[], cluster_centers_csv = ""):
    strsql = "select count(*) from trade.kabuka "
    if len(meigaras) > 0:
        strsql = strsql + kf.where_col_in(meigaras, "code")
    cnt = sql.exec_selsql(strsql, 0)
    
    km = MiniBatchKMeans(
                         n_clusters=np.sqrt(cnt*SAMPLE_RATE),
                         batch_size=1000)
    
    if cluster_centers_csv != "":
        path = "%s/%s" % (CSV_DIR, cluster_centers_csv)
        if os.path.exists(path):
            cluster_centers = f.csv2arr(path)
            km.cluster_centers_ = cluster_centers
    
    return km
    def _gen_km(self, bolverbose=False):
        strsql = "select count(*) from trade.kabuka "
        if len(self.meigaras) > 0:
            strsql = strsql + " where " + kf.where_col_in(self.meigaras, "code")
        cnt = sql.exec_selsql(strsql, 0)[0]

        path = "%s/%s" % (CSV_DIR, self.cluster_centers_csv)
        X = None
        if os.path.exists(path):
            X = np.array(f.csv2arr(path))

        if X is None:
            km = MiniBatchKMeans(n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, verbose=bolverbose)
        else:
            km = MiniBatchKMeans(
                n_clusters=int(np.sqrt(cnt * SAMPLE_RATE)), batch_size=1000, init=X, verbose=bolverbose
            )
        km.cluster_centers_ = X
        self.km = km