try:
            plt.savefig('%s/%s_%s_%s.png' % (plots, dataset, name, plot))
        except:
            pass
        try:
            plt.savefig('%s/%s_%s_%s.pdf' % (plots, dataset, name, plot))
        except:
            pass
        plt.clf()

#compute separation with a BDT
from sklearn.ensemble import GradientBoostingClassifier
from datasets import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score

train_bdt, test_bdt = train_test_split(data, 10, 5)
pre_separation = GradientBoostingClassifier(n_estimators=50,
                                            learning_rate=0.1,
                                            max_depth=4,
                                            random_state=42,
                                            verbose=1)
pre_separation.fit(train_bdt[reweight_feats], train_bdt.is_e)
test_proba = pre_separation.predict_proba(test_bdt[reweight_feats])[:, 1]
roc_pre = roc_curve(test_bdt[['is_e']], test_proba)[:2]
auc_pre = roc_auc_score(test_bdt[['is_e']], test_proba)

post_separation = GradientBoostingClassifier(n_estimators=50,
                                             learning_rate=0.1,
                                             max_depth=4,
                                             random_state=42,
                                             verbose=1)
fields = features + labeling
if args.SW94X and 'seeding' in args.what:
    fields += seed_94X_additional
else:
    fields += additional

if 'gsf_pt' not in fields: fields += ['gsf_pt']
data = pre_process_data(dataset,
                        fields,
                        for_seeding=('seeding' in args.what),
                        keep_nonmatch=args.usenomatch)

if args.noweight:
    data.weight = 1

train, test = train_test_split(data, 10, 8)
test.to_hdf('%s/nn_bo_%s_testdata.hdf' % (opti_dir, args.what), 'data')

train, validation = train_test_split(train, 10, 6)
train.to_hdf('%s/nn_bo_%s_traindata.hdf' % (opti_dir, args.what), 'data')
validation.to_hdf('%s/nn_bo_%s_valdata.hdf' % (opti_dir, args.what), 'data')

import xgboost as xgb
from sklearn.externals import joblib
from sklearn.metrics import roc_curve, roc_auc_score
iteration_idx = 0


def train_model(**kwargs):
    global iteration_idx
    print iteration_idx
Beispiel #3
0
        try:
            plt.savefig('%s/%s_%s_%s.png' % (plots, dataset, name, plot))
        except:
            pass
        try:
            plt.savefig('%s/%s_%s_%s.pdf' % (plots, dataset, name, plot))
        except:
            pass
        plt.clf()

#compute separation with a BDT
from sklearn.ensemble import GradientBoostingClassifier
from datasets import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score

train_bdt, test_bdt = train_test_split(data.head(1000000), 10, 5)
pre_separation = GradientBoostingClassifier(n_estimators=50,
                                            learning_rate=0.1,
                                            max_depth=4,
                                            random_state=42,
                                            verbose=1)
pre_separation.fit(train_bdt[reweight_feats], train_bdt.is_e)
test_proba = pre_separation.predict_proba(test_bdt[reweight_feats])[:, 1]
roc_pre = roc_curve(test_bdt[['is_e']], test_proba)[:2]
auc_pre = roc_auc_score(test_bdt[['is_e']], test_proba)

post_separation = GradientBoostingClassifier(n_estimators=50,
                                             learning_rate=0.1,
                                             max_depth=4,
                                             random_state=42,
                                             verbose=1)