Beispiel #1
0
import numpy as np
from KS import KS
import matplotlib.pyplot as plt
import matplotlib.animation as animation

L   = 16           # domain is 0 to 2.*np.pi*L
N   = 128          # number of collocation points
dt  = 0.5          # time step
diffusion = 1.0
ks = KS(L=L,diffusion=diffusion,N=N,dt=dt) # instantiate model

# define initial condition
#u = np.cos(x/L)*(1.0+np.sin(x/L)) # smooth IC
u = 0.01*np.random.normal(size=N) # noisy IC
# remove zonal mean
u = u - u.mean()
# spectral space variable.
ks.xspec[0] = np.fft.rfft(u)

# time stepping loop.
nmin = 1000; nmax = 5000
uu = []; tt = []
vspec = np.zeros(ks.xspec.shape[1], np.float)
x = np.arange(N)
fig, ax = plt.subplots()
line, = ax.plot(x, ks.x.squeeze())
ax.set_xlim(0,N-1)
ax.set_ylim(-3,3)
#Init only required for blitting to give a clean slate.
def init():
    global line
Beispiel #2
0
# for gaussian, smooth_len is standard deviation.
thresh = 0.99 # threshold for modulated ensemble eigenvalue truncation.
# model parameters...
# for truth run
dt = 0.5; npts = 128
diffusion_truth = 1.0
# for forecast model (same as above for perfect model expt)
# for simplicity, assume dt and npts stay the same.
#diffusion = 0.9
diffusion = diffusion_truth

rstruth = np.random.RandomState(42) # fixed seed for truth run
rsens = np.random.RandomState() # varying seed for ob noise and ensemble initial conditions

# model instance for truth (nature) run
model = KS(N=npts,dt=dt,diffusion=diffusion_truth,rs=rstruth)
# mode instance for forecast ensemble
ensemble = KS(N=npts,members=nens,dt=dt,diffusion=diffusion,rs=rsens)
for nt in range(ntstart): # spinup truth run
    model.advance()

# sample obs from truth, compute climo stats for model.
xx = []; tt = []
for nt in range(ntimes):
    model.advance()
    xx.append(model.x[0]) # single member
    tt.append(float(nt)*model.dt)
xtruth = np.array(xx,np.float)
timetruth = np.array(tt,np.float)
xtruth_mean = xtruth.mean()
xprime = xtruth - xtruth_mean
Beispiel #3
0
def XGBoost_part(dtrain=None,
                 test=None,
                 dtest_X=None,
                 test_y=None,
                 k=0,
                 gamma=0.02,
                 min_child_weight=1.1,
                 max_depth=5,
                 lamda=100,
                 subsamp=0.7,
                 col_bytree=0.7,
                 col_bylevel=0.7,
                 eta=0.01):

    param = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'gamma': gamma,
        'min_child_weight': min_child_weight,
        'max_depth': max_depth,
        'lambda': lamda,
        'subsample': subsamp,
        'colsample_bytree': col_bytree,
        'colsample_bylevel': col_bylevel,
        'eta': eta,
        'tree_method': 'exact',
        'seed': 0,
        'nthread': 12
    }
    cv_log = xgb.cv(param,
                    dtrain,
                    num_boost_round=3500,
                    nfold=5,
                    early_stopping_rounds=50,
                    seed=0)
    num_round = cv_log.shape[0]
    cf = './featurescore/cvg{}.csv'.format(str(num_round))
    cv_log.to_csv(cf)
    watchlist = [(dtrain, 'train')]
    #auc = cv_log['test-auc-mean'].max()
    bst = xgb.train(param,
                    dtrain,
                    num_round,
                    evals=watchlist,
                    early_stopping_rounds=50)
    # make prediction
    dtest = xgb.DMatrix(test, missing=-9999)
    preds = bst.predict(dtest)

    scores = bst.predict(dtrain, ntree_limit=bst.best_ntree_limit)
    fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1)
    ks = KS(y=test_y, score=scores)
    kk = int(ks * 10000000000) % 1000
    print "K-S:{}".format(ks)
    print "AUC:{}".format(metrics.auc(fp, tp))

    with open('./featurescore/a.txt', 'a') as f:
        S = "gamma= "+str(gamma)+\
         "  min_child_weight= "+str(min_child_weight)+\
         "  max_depth= "+str(max_depth)+\
         "  lamda= "+str(lamda)+\
         "\n" + \
         "subsamp= "+str(subsamp)+\
         "  col_bytree= "+str(col_bytree)+\
         "  col_bylevel= "+str(col_bylevel)+\
         "  eta= "+str(eta) + \
         "  ntree= "+str(bst.best_ntree_limit)+ \
         "\nfeatures scores: " + str(kk)
        f.writelines("{}\n".format(S))
        f.writelines("K-S:{}\n".format(ks))
        f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))
        #f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))
    #  写入文件
    writeDatas(preds, test, "xgk{}".format(str(ks)))

    # get feature score
    feature_score = bst.get_fscore()
    feature_score = sorted(feature_score.items(),
                           key=lambda x: x[1],
                           reverse=True)
    fs = []

    for (key, value) in feature_score:
        fs.append("{0},{1}\n".format(key, value))

    print "features scores:", kk
    ff = './featurescore/feature_score_{0}.csv'.format(kk)
    with open(ff, 'w') as f:
        f.writelines("feature,score\n")
        f.writelines(fs)
    return kk
Beispiel #4
0
import numpy as np
from KS import KS
import matplotlib.pyplot as plt
import matplotlib.animation as animation

L   = 16           # domain is 0 to 2.*np.pi*L
N   = 128          # number of collocation points
dt  = 0.5          # time step
diffusion = 1.0
ks = KS(L=L,diffusion=diffusion,N=N,dt=dt) # instantiate model

# define initial condition
#u = np.cos(x/L)*(1.0+np.sin(x/L)) # smooth IC
u = 0.01*np.random.normal(size=N) # noisy IC
# remove zonal mean
u = u - u.mean()
# spectral space variable.
ks.xspec[0] = np.fft.rfft(u)

# time stepping loop.
nmin = 1000; nmax = 5000
uu = []; tt = []
vspec = np.zeros(ks.xspec.shape[1], np.float)
x = np.arange(N)
fig, ax = plt.subplots()
line, = ax.plot(x, ks.x.squeeze())
ax.set_xlim(0,N-1)
ax.set_ylim(-3,3)
#Init only required for blitting to give a clean slate.
def init():
    global line
from KS import KS, KSAssim
import numpy as np
"""
Author Benjamin Pachev <*****@*****.**> 2020
"""


def fourier_projector(spec, modes=21):
    mod_spec = spec.copy()
    mod_spec[:, modes:] = 0
    return np.fft.irfft(mod_spec, axis=-1)


if __name__ == "__main__":
    #See if the data assimilation works
    true = KS()
    assimilator = KSAssim(fourier_projector,
                          mu=1,
                          diffusion=3,
                          update_params=True)
    max_n = 100
    for n in range(max_n):
        target = fourier_projector(true.xspec)
        assimilator.set_target(target)
        assimilator.advance()
        true.advance()
        print(assimilator.error(true))
Beispiel #6
0
# model parameters...
# for truth run
dt = 0.5
npts = 128
diffusion_truth = 1.0
# for forecast model (same as above for perfect model expt)
# for simplicity, assume dt and npts stay the same.
#diffusion = 0.9
diffusion = diffusion_truth

rstruth = np.random.RandomState(42)  # fixed seed for truth run
rsens = np.random.RandomState(
)  # varying seed for ob noise and ensemble initial conditions

# model instance for truth (nature) run
model = KS(N=npts, dt=dt, diffusion=diffusion_truth, rs=rstruth)
# mode instance for forecast ensemble
ensemble = KS(N=npts, members=nens, dt=dt, diffusion=diffusion, rs=rsens)
for nt in range(ntstart):  # spinup truth run
    model.advance()

# sample obs from truth, compute climo stats for model.
xx = []
tt = []
for nt in range(ntimes):
    model.advance()
    xx.append(model.x[0])  # single member
    tt.append(float(nt) * model.dt)
xtruth = np.array(xx, np.float)
timetruth = np.array(tt, np.float)
xtruth_mean = xtruth.mean()
Beispiel #7
0
def XGBoost_(train=None,
             y=None,
             test=None,
             dtest_X=None,
             test_y=None,
             k=0,
             num_round=3500,
             gamma=0.02,
             min_child_weight=1.1,
             max_depth=5,
             lamda=10,
             scale_pos_weight=3,
             subsamp=0.7,
             col_bytree=0.7,
             col_bylevel=0.7,
             eta=0.01,
             file="aac"):

    param = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        #'eval_metric':'auc',
        'gamma': gamma,
        'min_child_weight': min_child_weight,
        'max_depth': max_depth,
        'lambda': lamda,
        'subsample': subsamp,
        'colsample_bytree': col_bytree,
        'colsample_bylevel': col_bylevel,
        'eta': eta,
        'tree_method': 'exact',
        'seed': 0,
        'nthread': 12
    }
    with open('./test/a{}.txt'.format(file), 'a') as f:
        S = "gamma= " + str(gamma) + \
         " scale_pos_weight= " + str(scale_pos_weight) + \
         "  min_child_weight= " + str(min_child_weight) + \
         "  max_depth= " + str(max_depth) + \
         "  lamda= " + str(lamda) + \
         "\n" + \
         "subsamp= " + str(subsamp) + \
         "  col_bytree= " + str(col_bytree) + \
         "  col_bylevel= " + str(col_bylevel) + \
         "  eta= " + str(eta)
        f.writelines("{}\n".format(S))
    dtrain = xgb.DMatrix(train, label=y, missing=-9999)
    #cv_log = xgb.cv(param, dtrain,show_stdv=True,verbose_eval=1,feval=evalerror,num_boost_round=3500, nfold=5,early_stopping_rounds=10, seed=0)
    #num_round = 21#cv_log.shape[0]
    #cf = './featurescore/acvg{}.csv'.format(str(num_round))
    #cv_log.to_csv(cf)

    watchlist = [(dtrain, 'train'), (dtest_X, 'eval')]
    #auc = cv_log['test-auc-mean'].max()
    bst = xgb.train(param,
                    dtrain,
                    num_round,
                    watchlist,
                    maximize=True,
                    feval=evalerror,
                    early_stopping_rounds=50)
    # make prediction
    dtest = xgb.DMatrix(test, missing=-9999)
    preds = bst.predict(dtest, ntree_limit=bst.best_ntree_limit)
    p = bst.predict(dtrain, ntree_limit=bst.best_ntree_limit)

    scores = bst.predict(dtest_X, ntree_limit=bst.best_ntree_limit)
    fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1)
    auc = metrics.auc(fp, tp)
    ks = KS(y=test_y.label, pred=scores)
    kk = int(ks * 10000000000) % 10000
    print "K-S:{}".format(ks)
    print "AUC:{}".format(auc)

    with open('./test/a{}.txt'.format(file), 'a') as f:
        S =  "  best_ntree_limit:" + str(bst.best_ntree_limit) + \
          "   best_iteration= "+str(bst.best_iteration)+ \
         "\nfeatures scores: " + str(kk)
        f.writelines("{}\n".format(S))
        f.writelines("K-S:{}\n".format(ks))
        f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))

    res = writeDatas(preds, test, "xgk_{}".format(str(kk)))

    res.columns = ['label' + str(kk)]
    y['label' + str(kk)] = p
    y = pd.concat([y, res])
    y.drop('label', axis=1, inplace=True)
    y = y.reset_index()
    try:
        ypred = pd.read_csv("./test/y/a{}.csv".format(file))
        y = pd.merge(y, ypred, on='userid')
    except:
        pass
    finally:
        y.to_csv("./test/y/a{}.csv".format(file), index=None)

    # get feature score
    feature_score = bst.get_fscore()
    feature_score = sorted(feature_score.items(),
                           key=lambda x: x[1],
                           reverse=True)
    fs = []
    for (key, value) in feature_score:
        fs.append("{0},{1}\n".format(key, value))
    print "features scores:", kk
    ff = './test/feature_score_{0}.csv'.format(kk)
    with open(ff, 'w') as f:
        f.writelines("feature,score\n")
        f.writelines(fs)
Beispiel #8
0
def evalerror(preds, d):
    labels = d.get_label()
    return 'KS', KS(pred=preds, y=labels)
Beispiel #9
0
def XGBoost_gbdm(train=None,
                 y=None,
                 test=None,
                 dtest_X=None,
                 test_y=None,
                 k=0,
                 num_round=3500,
                 gamma=0.02,
                 min_child_weight=1.1,
                 max_depth=5,
                 lamda=10,
                 scale_pos_weight=3,
                 subsamp=0.7,
                 col_bytree=0.7,
                 col_bylevel=0.7,
                 eta=0.01,
                 file="aac"):

    param = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        #'eval_metric':'auc',
        'scale_pos_weight': scale_pos_weight,
        'gamma': gamma,
        'min_child_weight': min_child_weight,
        'max_depth': max_depth,
        'lambda': lamda,
        'subsample': subsamp,
        'colsample_bytree': col_bytree,
        'colsample_bylevel': col_bylevel,
        'eta': eta,
        'tree_method': 'exact',
        'seed': 0,
        'nthread': 12
    }
    with open('./findx/af{}.txt'.format(file), 'a') as f:
        S = "gamma= " + str(gamma) + \
         "  scale_pos_weight= " + str(scale_pos_weight) + \
         "  min_child_weight= " + str(min_child_weight) + \
         "  max_depth= " + str(max_depth) + \
         "  lamda= " + str(lamda) + \
         "\n" + \
         "subsamp= " + str(subsamp) + \
         "  col_bytree= " + str(col_bytree) + \
         "  col_bylevel= " + str(col_bylevel) + \
         "  eta= " + str(eta)
        f.writelines("{}\n".format(S))
    dtrain = xgb.DMatrix(train, label=y.label, missing=-9999)
    #cv_log = xgb.cv(param, dtrain,show_stdv=True,verbose_eval=1,feval=evalerror,num_boost_round=3500, nfold=5,early_stopping_rounds=10, seed=0)
    #num_round = 21#cv_log.shape[0]
    #cf = './featurescore/acvg{}.csv'.format(str(num_round))
    #cv_log.to_csv(cf)

    watchlist = [(dtrain, 'train'), (dtest_X, 'eval')]
    bst = xgb.train(param,
                    dtrain,
                    num_round,
                    watchlist,
                    maximize=True,
                    feval=evalerror,
                    early_stopping_rounds=50)

    scores = bst.predict(dtest_X, ntree_limit=bst.best_ntree_limit)
    fp, tp, thresholds = metrics.roc_curve(test_y, scores, pos_label=1)
    auc = metrics.auc(fp, tp)
    ks = KS(y=test_y.label, pred=scores)
    kk = int(ks * 10000000000) % 10000
    print "K-S:{}".format(ks)
    print "AUC:{}".format(auc)

    with open('./findx/af{}.txt'.format(file), 'a') as f:
        S =  "  best_ntree_limit:" + str(bst.best_ntree_limit) + \
          "   best_iteration= "+str(bst.best_iteration)+ \
         "\nfeatures scores: " + str(kk)
        f.writelines("{}\n".format(S))
        f.writelines("K-S:{}\n".format(ks))
        f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))
        #f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))

    return ks, auc, bst.best_ntree_limit
Beispiel #10
0
                                            criterion=c,
                                            warm_start=True,
                                            max_depth=md,
                                            max_features=0.6,
                                            min_samples_leaf=5,
                                            n_jobs=12,
                                            random_state=0)

                rf.fit(train_X, train_y)

                score = rf.predict_proba(test_X)[:, 1]

                fp, tp, thresholds = metrics.roc_curve(test_y.values,
                                                       score,
                                                       pos_label=1)
                ks = KS(y=test_y, score=score)
                print "K-S:{}".format(ks)
                print "AUC:{}".format(metrics.auc(fp, tp))

                ans = rf.predict_proba(test)[:, 1]

                with open('./featurescore/a.txt', 'a') as f:
                    S = "criterion= " + str(c) + \
                        "  n_estimators= " + str(n) + \
                        "  max_depth= " + str(md)
                    f.writelines("{}\n".format(S))
                    f.writelines("K-S:{}\n".format(ks))
                    f.writelines("AUC:{}\n\n".format(metrics.auc(fp, tp)))

                writeDatas(ans, test, "rf{}".format(str(ks)))
            except: