def test_cross_val_meta_stack(self): x, y = DataGenerator.get_digits_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) xgb_initparam = ParamsGenerator.get_xgb_init_param() rf_initparam = ParamsGenerator.get_rf_init_param() ext_initparam = ParamsGenerator.get_ext_init_param() xgb_bestparam = CrossValStack.get_best_xgbopt(x_train, y_train, xgb_initparam) rf_bestparam = CrossValStack.get_best_sklopt(x_train, y_train, rf_initparam) ext_bestparam = CrossValStack.get_best_etopt(x_train, y_train, ext_initparam) res = CrossValStack.cross_val_meta_stack(x_train, y_train, x_test, xgb_bestparam, rf_bestparam, ext_bestparam, csvstack_cv=3) dfres = pd.DataFrame([res[0][:, 1], res[1][:, 1], res[2][:, 1]]).transpose() dfres.columns = ['p1', 'p2', 'p3'] y_test_xgb = CrossValStack.predict_opt_clf(XGBOpt.XGBOpt(x_train, y_train), xgb_bestparam, x_test, x_test)[0] y_test_skl = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), rf_bestparam, x_test, x_test)[0] y_test_ext = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), ext_bestparam, x_test, x_test)[0] print metrics.roc_auc_score(y_test, y_test_xgb) print metrics.roc_auc_score(y_test, y_test_skl) print metrics.roc_auc_score(y_test, y_test_ext) print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3) self.assertEqual(len(res), 3)
def test_cross_val_stack(self): x, y = DataGenerator.get_digits_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) xgb_initparam = ParamsGenerator.get_xgb_init_param() rf_initparam = ParamsGenerator.get_rf_init_param() ext_initparam = ParamsGenerator.get_ext_init_param() xgb_bestparam = CrossValStack.get_best_xgbopt(x_train, y_train, xgb_initparam) rf_bestparam = CrossValStack.get_best_sklopt(x_train, y_train, rf_initparam) ext_bestparam = CrossValStack.get_best_etopt(x_train, y_train, ext_initparam) res = CrossValStack.cross_val_stack(x_train, y_train, x_test, xgb_bestparam, rf_bestparam, ext_bestparam) dfres = pd.DataFrame([res[0][:, 1], res[1][:, 1], res[2][:, 1]]).transpose() dfres.columns = ['p1', 'p2', 'p3'] y_test_xgb = CrossValStack.predict_opt_clf(XGBOpt.XGBOpt(x_train, y_train), xgb_bestparam, x_test, x_test)[0] y_test_skl = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), rf_bestparam, x_test, x_test)[0] y_test_ext = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), ext_bestparam, x_test, x_test)[0] print metrics.roc_auc_score(y_test, y_test_xgb) print metrics.roc_auc_score(y_test, y_test_skl) print metrics.roc_auc_score(y_test, y_test_ext) print metrics.roc_auc_score(y_test, dfres.p1.values) print metrics.roc_auc_score(y_test, dfres.p2.values) print metrics.roc_auc_score(y_test, dfres.p3.values) print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3) print metrics.roc_auc_score(y_test, dfres.p1.values) print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3) self.assertEqual(len(res), 5)
def test_cross_val_stack(self): x, y = DataGenerator.get_digits_data() # In order to obtain some categorical columns x['i63'] = x['i63'].map(str) x['i62'] = x['i62'].map(str) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_shadow = x_train.copy() x_train.loc[:, 'source'] = 0 x_shadow.loc[:, 'source'] = 1 x_all = pd.concat([x_train, x_shadow]) shadow_selector = x_all['source'] == 0 ChaosGeneration.chaos_feature_importance(x_all, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30) sorted_x = sorted(dic.items(), key=operator.itemgetter(1)) self.assertGreater(len(dic), len(x_train.columns)) self.assertGreater(len(dic), len(x_shadow.columns))
def test_kerasopt_auc(self): x, y = DataGenerator.get_digits_data() kerasopt = KerasOpt.KerasOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_reg_keras_dnn param['eval_metric'] = 'auc' best = kerasopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(kerasopt.score, -0.85)
def test_xgbopt_tree_auc(self): x, y = DataGenerator.get_digits_data() xgbopt = XGBOpt.XGBOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_reg_xgb_tree param['eval_metric'] = 'auc' best = xgbopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(xgbopt.score, -0.99)
def test_lropt_logloss(self): x, y = DataGenerator.get_digits_data() skopt = SklearnOpt.SklearnOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_clf_skl_lr param['eval_metric'] = 'logloss' param['type'] = 'logistic_regression' best = skopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(skopt.score, 0.011)
def test_etopt_logloss(self): x, y = DataGenerator.get_digits_data() skopt = SklearnOpt.SklearnOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_reg_skl_rf param['eval_metric'] = 'logloss' param['type'] = 'extra_trees' best = skopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(skopt.score, 0.03)
def test_rfopt_auc(self): x, y = DataGenerator.get_digits_data() skopt = SklearnOpt.SklearnOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_reg_skl_rf param['eval_metric'] = 'auc' param['type'] = 'random_forest' best = skopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(skopt.score, -0.99)
def test_xgbopt_tree_logloss(self): x, y = DataGenerator.get_digits_data() xgbopt = XGBOpt.XGBOpt(x, y) param = HyperoptParam.HyperoptParam.param_space_reg_xgb_tree param['max_evals'] = 10 param['eval_metric'] = 'logloss' best = xgbopt.run_hp(param) self.assertIsNotNone(best) self.assertLess(xgbopt.score, 0.04)
def test_cross_val_stack_none(self): x, y = DataGenerator.get_digits_data() # In order to obtain some categorical columns x["i63"] = x["i63"].map(str) x["i62"] = x["i62"].map(str) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_train.loc[:, "source"] = 0 shadow_selector = x_train["source"] == 0 ChaosGeneration.chaos_feature_importance( x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30 ) sorted_x = sorted(dic.items(), key=operator.itemgetter(1)) self.assertGreater(len(dic), len(x_train.columns))
def test_handle_nocategoric_nonreg(self): x, y = DataGenerator.get_digits_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) dic = {} x_train.loc[:, 'source'] = 0 shadow_selector = x_train['source'] == 0 ChaosGeneration.chaos_feature_importance(x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30) self.assertGreater(len(dic), len(x_train.columns))