try: plt.savefig('%s/%s_%s_%s.png' % (plots, dataset, name, plot)) except: pass try: plt.savefig('%s/%s_%s_%s.pdf' % (plots, dataset, name, plot)) except: pass plt.clf() #compute separation with a BDT from sklearn.ensemble import GradientBoostingClassifier from datasets import train_test_split from sklearn.metrics import roc_curve, roc_auc_score train_bdt, test_bdt = train_test_split(data, 10, 5) pre_separation = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=4, random_state=42, verbose=1) pre_separation.fit(train_bdt[reweight_feats], train_bdt.is_e) test_proba = pre_separation.predict_proba(test_bdt[reweight_feats])[:, 1] roc_pre = roc_curve(test_bdt[['is_e']], test_proba)[:2] auc_pre = roc_auc_score(test_bdt[['is_e']], test_proba) post_separation = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=4, random_state=42, verbose=1)
fields = features + labeling if args.SW94X and 'seeding' in args.what: fields += seed_94X_additional else: fields += additional if 'gsf_pt' not in fields: fields += ['gsf_pt'] data = pre_process_data(dataset, fields, for_seeding=('seeding' in args.what), keep_nonmatch=args.usenomatch) if args.noweight: data.weight = 1 train, test = train_test_split(data, 10, 8) test.to_hdf('%s/nn_bo_%s_testdata.hdf' % (opti_dir, args.what), 'data') train, validation = train_test_split(train, 10, 6) train.to_hdf('%s/nn_bo_%s_traindata.hdf' % (opti_dir, args.what), 'data') validation.to_hdf('%s/nn_bo_%s_valdata.hdf' % (opti_dir, args.what), 'data') import xgboost as xgb from sklearn.externals import joblib from sklearn.metrics import roc_curve, roc_auc_score iteration_idx = 0 def train_model(**kwargs): global iteration_idx print iteration_idx
try: plt.savefig('%s/%s_%s_%s.png' % (plots, dataset, name, plot)) except: pass try: plt.savefig('%s/%s_%s_%s.pdf' % (plots, dataset, name, plot)) except: pass plt.clf() #compute separation with a BDT from sklearn.ensemble import GradientBoostingClassifier from datasets import train_test_split from sklearn.metrics import roc_curve, roc_auc_score train_bdt, test_bdt = train_test_split(data.head(1000000), 10, 5) pre_separation = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=4, random_state=42, verbose=1) pre_separation.fit(train_bdt[reweight_feats], train_bdt.is_e) test_proba = pre_separation.predict_proba(test_bdt[reweight_feats])[:, 1] roc_pre = roc_curve(test_bdt[['is_e']], test_proba)[:2] auc_pre = roc_auc_score(test_bdt[['is_e']], test_proba) post_separation = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=4, random_state=42, verbose=1)