import numpy as np from KS import KS import matplotlib.pyplot as plt import matplotlib.animation as animation L = 16 # domain is 0 to 2.*np.pi*L N = 128 # number of collocation points dt = 0.5 # time step diffusion = 1.0 ks = KS(L=L,diffusion=diffusion,N=N,dt=dt) # instantiate model # define initial condition #u = np.cos(x/L)*(1.0+np.sin(x/L)) # smooth IC u = 0.01*np.random.normal(size=N) # noisy IC # remove zonal mean u = u - u.mean() # spectral space variable. ks.xspec[0] = np.fft.rfft(u) # time stepping loop. nmin = 1000; nmax = 5000 uu = []; tt = [] vspec = np.zeros(ks.xspec.shape[1], np.float) x = np.arange(N) fig, ax = plt.subplots() line, = ax.plot(x, ks.x.squeeze()) ax.set_xlim(0,N-1) ax.set_ylim(-3,3) #Init only required for blitting to give a clean slate. def init(): global line
# for gaussian, smooth_len is standard deviation. thresh = 0.99 # threshold for modulated ensemble eigenvalue truncation. # model parameters... # for truth run dt = 0.5; npts = 128 diffusion_truth = 1.0 # for forecast model (same as above for perfect model expt) # for simplicity, assume dt and npts stay the same. #diffusion = 0.9 diffusion = diffusion_truth rstruth = np.random.RandomState(42) # fixed seed for truth run rsens = np.random.RandomState() # varying seed for ob noise and ensemble initial conditions # model instance for truth (nature) run model = KS(N=npts,dt=dt,diffusion=diffusion_truth,rs=rstruth) # mode instance for forecast ensemble ensemble = KS(N=npts,members=nens,dt=dt,diffusion=diffusion,rs=rsens) for nt in range(ntstart): # spinup truth run model.advance() # sample obs from truth, compute climo stats for model. xx = []; tt = [] for nt in range(ntimes): model.advance() xx.append(model.x[0]) # single member tt.append(float(nt)*model.dt) xtruth = np.array(xx,np.float) timetruth = np.array(tt,np.float) xtruth_mean = xtruth.mean() xprime = xtruth - xtruth_mean
def XGBoost_part(dtrain=None, test=None, dtest_X=None, test_y=None, k=0, gamma=0.02, min_child_weight=1.1, max_depth=5, lamda=100, subsamp=0.7, col_bytree=0.7, col_bylevel=0.7, eta=0.01): param = { 'booster': 'gbtree', 'objective': 'binary:logistic', 'eval_metric': 'auc', 'gamma': gamma, 'min_child_weight': min_child_weight, 'max_depth': max_depth, 'lambda': lamda, 'subsample': subsamp, 'colsample_bytree': col_bytree, 'colsample_bylevel': col_bylevel, 'eta': eta, 'tree_method': 'exact', 'seed': 0, 'nthread': 12 } cv_log = xgb.cv(param, dtrain, num_boost_round=3500, nfold=5, early_stopping_rounds=50, seed=0) num_round = cv_log.shape[0] cf = './featurescore/cvg{}.csv'.format(str(num_round)) cv_log.to_csv(cf) watchlist = [(dtrain, 'train')] #auc = cv_log['test-auc-mean'].max() bst = xgb.train(param, dtrain, num_round, evals=watchlist, early_stopping_rounds=50) # make prediction dtest = xgb.DMatrix(test, missing=-9999) preds = bst.predict(dtest) scores = bst.predict(dtrain, ntree_limit=bst.best_ntree_limit) fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1) ks = KS(y=test_y, score=scores) kk = int(ks * 10000000000) % 1000 print "K-S:{}".format(ks) print "AUC:{}".format(metrics.auc(fp, tp)) with open('./featurescore/a.txt', 'a') as f: S = "gamma= "+str(gamma)+\ " min_child_weight= "+str(min_child_weight)+\ " max_depth= "+str(max_depth)+\ " lamda= "+str(lamda)+\ "\n" + \ "subsamp= "+str(subsamp)+\ " col_bytree= "+str(col_bytree)+\ " col_bylevel= "+str(col_bylevel)+\ " eta= "+str(eta) + \ " ntree= "+str(bst.best_ntree_limit)+ \ "\nfeatures scores: " + str(kk) f.writelines("{}\n".format(S)) f.writelines("K-S:{}\n".format(ks)) f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) #f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) # 写入文件 writeDatas(preds, test, "xgk{}".format(str(ks))) # get feature score feature_score = bst.get_fscore() feature_score = sorted(feature_score.items(), key=lambda x: x[1], reverse=True) fs = [] for (key, value) in feature_score: fs.append("{0},{1}\n".format(key, value)) print "features scores:", kk ff = './featurescore/feature_score_{0}.csv'.format(kk) with open(ff, 'w') as f: f.writelines("feature,score\n") f.writelines(fs) return kk
from KS import KS, KSAssim import numpy as np """ Author Benjamin Pachev <*****@*****.**> 2020 """ def fourier_projector(spec, modes=21): mod_spec = spec.copy() mod_spec[:, modes:] = 0 return np.fft.irfft(mod_spec, axis=-1) if __name__ == "__main__": #See if the data assimilation works true = KS() assimilator = KSAssim(fourier_projector, mu=1, diffusion=3, update_params=True) max_n = 100 for n in range(max_n): target = fourier_projector(true.xspec) assimilator.set_target(target) assimilator.advance() true.advance() print(assimilator.error(true))
# model parameters... # for truth run dt = 0.5 npts = 128 diffusion_truth = 1.0 # for forecast model (same as above for perfect model expt) # for simplicity, assume dt and npts stay the same. #diffusion = 0.9 diffusion = diffusion_truth rstruth = np.random.RandomState(42) # fixed seed for truth run rsens = np.random.RandomState( ) # varying seed for ob noise and ensemble initial conditions # model instance for truth (nature) run model = KS(N=npts, dt=dt, diffusion=diffusion_truth, rs=rstruth) # mode instance for forecast ensemble ensemble = KS(N=npts, members=nens, dt=dt, diffusion=diffusion, rs=rsens) for nt in range(ntstart): # spinup truth run model.advance() # sample obs from truth, compute climo stats for model. xx = [] tt = [] for nt in range(ntimes): model.advance() xx.append(model.x[0]) # single member tt.append(float(nt) * model.dt) xtruth = np.array(xx, np.float) timetruth = np.array(tt, np.float) xtruth_mean = xtruth.mean()
def XGBoost_(train=None, y=None, test=None, dtest_X=None, test_y=None, k=0, num_round=3500, gamma=0.02, min_child_weight=1.1, max_depth=5, lamda=10, scale_pos_weight=3, subsamp=0.7, col_bytree=0.7, col_bylevel=0.7, eta=0.01, file="aac"): param = { 'booster': 'gbtree', 'objective': 'binary:logistic', #'eval_metric':'auc', 'gamma': gamma, 'min_child_weight': min_child_weight, 'max_depth': max_depth, 'lambda': lamda, 'subsample': subsamp, 'colsample_bytree': col_bytree, 'colsample_bylevel': col_bylevel, 'eta': eta, 'tree_method': 'exact', 'seed': 0, 'nthread': 12 } with open('./test/a{}.txt'.format(file), 'a') as f: S = "gamma= " + str(gamma) + \ " scale_pos_weight= " + str(scale_pos_weight) + \ " min_child_weight= " + str(min_child_weight) + \ " max_depth= " + str(max_depth) + \ " lamda= " + str(lamda) + \ "\n" + \ "subsamp= " + str(subsamp) + \ " col_bytree= " + str(col_bytree) + \ " col_bylevel= " + str(col_bylevel) + \ " eta= " + str(eta) f.writelines("{}\n".format(S)) dtrain = xgb.DMatrix(train, label=y, missing=-9999) #cv_log = xgb.cv(param, dtrain,show_stdv=True,verbose_eval=1,feval=evalerror,num_boost_round=3500, nfold=5,early_stopping_rounds=10, seed=0) #num_round = 21#cv_log.shape[0] #cf = './featurescore/acvg{}.csv'.format(str(num_round)) #cv_log.to_csv(cf) watchlist = [(dtrain, 'train'), (dtest_X, 'eval')] #auc = cv_log['test-auc-mean'].max() bst = xgb.train(param, dtrain, num_round, watchlist, maximize=True, feval=evalerror, early_stopping_rounds=50) # make prediction dtest = xgb.DMatrix(test, missing=-9999) preds = bst.predict(dtest, ntree_limit=bst.best_ntree_limit) p = bst.predict(dtrain, ntree_limit=bst.best_ntree_limit) scores = bst.predict(dtest_X, ntree_limit=bst.best_ntree_limit) fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1) auc = metrics.auc(fp, tp) ks = KS(y=test_y.label, pred=scores) kk = int(ks * 10000000000) % 10000 print "K-S:{}".format(ks) print "AUC:{}".format(auc) with open('./test/a{}.txt'.format(file), 'a') as f: S = " best_ntree_limit:" + str(bst.best_ntree_limit) + \ " best_iteration= "+str(bst.best_iteration)+ \ "\nfeatures scores: " + str(kk) f.writelines("{}\n".format(S)) f.writelines("K-S:{}\n".format(ks)) f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) res = writeDatas(preds, test, "xgk_{}".format(str(kk))) res.columns = ['label' + str(kk)] y['label' + str(kk)] = p y = pd.concat([y, res]) y.drop('label', axis=1, inplace=True) y = y.reset_index() try: ypred = pd.read_csv("./test/y/a{}.csv".format(file)) y = pd.merge(y, ypred, on='userid') except: pass finally: y.to_csv("./test/y/a{}.csv".format(file), index=None) # get feature score feature_score = bst.get_fscore() feature_score = sorted(feature_score.items(), key=lambda x: x[1], reverse=True) fs = [] for (key, value) in feature_score: fs.append("{0},{1}\n".format(key, value)) print "features scores:", kk ff = './test/feature_score_{0}.csv'.format(kk) with open(ff, 'w') as f: f.writelines("feature,score\n") f.writelines(fs)
def evalerror(preds, d): labels = d.get_label() return 'KS', KS(pred=preds, y=labels)
def XGBoost_gbdm(train=None, y=None, test=None, dtest_X=None, test_y=None, k=0, num_round=3500, gamma=0.02, min_child_weight=1.1, max_depth=5, lamda=10, scale_pos_weight=3, subsamp=0.7, col_bytree=0.7, col_bylevel=0.7, eta=0.01, file="aac"): param = { 'booster': 'gbtree', 'objective': 'binary:logistic', #'eval_metric':'auc', 'scale_pos_weight': scale_pos_weight, 'gamma': gamma, 'min_child_weight': min_child_weight, 'max_depth': max_depth, 'lambda': lamda, 'subsample': subsamp, 'colsample_bytree': col_bytree, 'colsample_bylevel': col_bylevel, 'eta': eta, 'tree_method': 'exact', 'seed': 0, 'nthread': 12 } with open('./findx/af{}.txt'.format(file), 'a') as f: S = "gamma= " + str(gamma) + \ " scale_pos_weight= " + str(scale_pos_weight) + \ " min_child_weight= " + str(min_child_weight) + \ " max_depth= " + str(max_depth) + \ " lamda= " + str(lamda) + \ "\n" + \ "subsamp= " + str(subsamp) + \ " col_bytree= " + str(col_bytree) + \ " col_bylevel= " + str(col_bylevel) + \ " eta= " + str(eta) f.writelines("{}\n".format(S)) dtrain = xgb.DMatrix(train, label=y.label, missing=-9999) #cv_log = xgb.cv(param, dtrain,show_stdv=True,verbose_eval=1,feval=evalerror,num_boost_round=3500, nfold=5,early_stopping_rounds=10, seed=0) #num_round = 21#cv_log.shape[0] #cf = './featurescore/acvg{}.csv'.format(str(num_round)) #cv_log.to_csv(cf) watchlist = [(dtrain, 'train'), (dtest_X, 'eval')] bst = xgb.train(param, dtrain, num_round, watchlist, maximize=True, feval=evalerror, early_stopping_rounds=50) scores = bst.predict(dtest_X, ntree_limit=bst.best_ntree_limit) fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1) auc = metrics.auc(fp, tp) ks = KS(y=test_y.label, pred=scores) kk = int(ks * 10000000000) % 10000 print "K-S:{}".format(ks) print "AUC:{}".format(auc) with open('./findx/af{}.txt'.format(file), 'a') as f: S = " best_ntree_limit:" + str(bst.best_ntree_limit) + \ " best_iteration= "+str(bst.best_iteration)+ \ "\nfeatures scores: " + str(kk) f.writelines("{}\n".format(S)) f.writelines("K-S:{}\n".format(ks)) f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) #f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) return ks, auc, bst.best_ntree_limit
criterion=c, warm_start=True, max_depth=md, max_features=0.6, min_samples_leaf=5, n_jobs=12, random_state=0) rf.fit(train_X, train_y) score = rf.predict_proba(test_X)[:, 1] fp, tp, thresholds = metrics.roc_curve(test_y.values, score, pos_label=1) ks = KS(y=test_y, score=score) print "K-S:{}".format(ks) print "AUC:{}".format(metrics.auc(fp, tp)) ans = rf.predict_proba(test)[:, 1] with open('./featurescore/a.txt', 'a') as f: S = "criterion= " + str(c) + \ " n_estimators= " + str(n) + \ " max_depth= " + str(md) f.writelines("{}\n".format(S)) f.writelines("K-S:{}\n".format(ks)) f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp))) writeDatas(ans, test, "rf{}".format(str(ks))) except: