Esempio n. 1
0
    def test_cross_val_meta_stack(self):
        x, y = DataGenerator.get_digits_data()

        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

        xgb_initparam = ParamsGenerator.get_xgb_init_param()
        rf_initparam = ParamsGenerator.get_rf_init_param()
        ext_initparam = ParamsGenerator.get_ext_init_param()

        xgb_bestparam = CrossValStack.get_best_xgbopt(x_train, y_train, xgb_initparam)
        rf_bestparam = CrossValStack.get_best_sklopt(x_train, y_train, rf_initparam)
        ext_bestparam = CrossValStack.get_best_etopt(x_train, y_train, ext_initparam)

        res = CrossValStack.cross_val_meta_stack(x_train, y_train, x_test, xgb_bestparam, rf_bestparam, ext_bestparam,
                                                 csvstack_cv=3)
        dfres = pd.DataFrame([res[0][:, 1], res[1][:, 1], res[2][:, 1]]).transpose()
        dfres.columns = ['p1', 'p2', 'p3']

        y_test_xgb = CrossValStack.predict_opt_clf(XGBOpt.XGBOpt(x_train, y_train), xgb_bestparam, x_test, x_test)[0]
        y_test_skl = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), rf_bestparam, x_test, x_test)[0]
        y_test_ext = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), ext_bestparam, x_test, x_test)[0]

        print metrics.roc_auc_score(y_test, y_test_xgb)
        print metrics.roc_auc_score(y_test, y_test_skl)
        print metrics.roc_auc_score(y_test, y_test_ext)

        print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3)

        self.assertEqual(len(res), 3)
    def test_cross_val_stack(self):
        x, y = DataGenerator.get_digits_data()

        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

        xgb_initparam = ParamsGenerator.get_xgb_init_param()
        rf_initparam = ParamsGenerator.get_rf_init_param()
        ext_initparam = ParamsGenerator.get_ext_init_param()

        xgb_bestparam = CrossValStack.get_best_xgbopt(x_train, y_train, xgb_initparam)
        rf_bestparam = CrossValStack.get_best_sklopt(x_train, y_train, rf_initparam)
        ext_bestparam = CrossValStack.get_best_etopt(x_train, y_train, ext_initparam)

        res = CrossValStack.cross_val_stack(x_train, y_train, x_test, xgb_bestparam, rf_bestparam, ext_bestparam)
        dfres = pd.DataFrame([res[0][:, 1], res[1][:, 1], res[2][:, 1]]).transpose()
        dfres.columns = ['p1', 'p2', 'p3']

        y_test_xgb = CrossValStack.predict_opt_clf(XGBOpt.XGBOpt(x_train, y_train), xgb_bestparam, x_test, x_test)[0]
        y_test_skl = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), rf_bestparam, x_test, x_test)[0]
        y_test_ext = CrossValStack.predict_opt_clf(SklearnOpt.SklearnOpt(x_train, y_train), ext_bestparam, x_test, x_test)[0]

        print metrics.roc_auc_score(y_test, y_test_xgb)
        print metrics.roc_auc_score(y_test, y_test_skl)
        print metrics.roc_auc_score(y_test, y_test_ext)

        print metrics.roc_auc_score(y_test, dfres.p1.values)
        print metrics.roc_auc_score(y_test, dfres.p2.values)
        print metrics.roc_auc_score(y_test, dfres.p3.values)

        print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3)

        print metrics.roc_auc_score(y_test, dfres.p1.values)
        print metrics.roc_auc_score(y_test, (dfres.p1+dfres.p2+dfres.p3).values/3)

        self.assertEqual(len(res), 5)
Esempio n. 3
0
    def test_cross_val_stack(self):
        x, y = DataGenerator.get_digits_data()

        # In order to obtain some categorical columns
        x['i63'] = x['i63'].map(str)
        x['i62'] = x['i62'].map(str)
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=42)
        dic = {}
        x_shadow = x_train.copy()

        x_train.loc[:, 'source'] = 0
        x_shadow.loc[:, 'source'] = 1
        x_all = pd.concat([x_train, x_shadow])
        shadow_selector = x_all['source'] == 0
        ChaosGeneration.chaos_feature_importance(x_all,
                                                 y_train,
                                                 shadow_selector,
                                                 feat_dic=dic,
                                                 feat_iter=10,
                                                 nb_features=20,
                                                 chaos_gen_iter=30)
        sorted_x = sorted(dic.items(), key=operator.itemgetter(1))
        self.assertGreater(len(dic), len(x_train.columns))
        self.assertGreater(len(dic), len(x_shadow.columns))
Esempio n. 4
0
 def test_kerasopt_auc(self):
     x, y = DataGenerator.get_digits_data()
     kerasopt = KerasOpt.KerasOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_reg_keras_dnn
     param['eval_metric'] = 'auc'
     best = kerasopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(kerasopt.score, -0.85)
Esempio n. 5
0
 def test_xgbopt_tree_auc(self):
     x, y = DataGenerator.get_digits_data()
     xgbopt = XGBOpt.XGBOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_reg_xgb_tree
     param['eval_metric'] = 'auc'
     best = xgbopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(xgbopt.score, -0.99)
Esempio n. 6
0
 def test_lropt_logloss(self):
     x, y = DataGenerator.get_digits_data()
     skopt = SklearnOpt.SklearnOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_clf_skl_lr
     param['eval_metric'] = 'logloss'
     param['type'] = 'logistic_regression'
     best = skopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(skopt.score, 0.011)
Esempio n. 7
0
 def test_etopt_logloss(self):
     x, y = DataGenerator.get_digits_data()
     skopt = SklearnOpt.SklearnOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_reg_skl_rf
     param['eval_metric'] = 'logloss'
     param['type'] = 'extra_trees'
     best = skopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(skopt.score, 0.03)
Esempio n. 8
0
 def test_rfopt_auc(self):
     x, y = DataGenerator.get_digits_data()
     skopt = SklearnOpt.SklearnOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_reg_skl_rf
     param['eval_metric'] = 'auc'
     param['type'] = 'random_forest'
     best = skopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(skopt.score, -0.99)
Esempio n. 9
0
 def test_xgbopt_tree_logloss(self):
     x, y = DataGenerator.get_digits_data()
     xgbopt = XGBOpt.XGBOpt(x, y)
     param = HyperoptParam.HyperoptParam.param_space_reg_xgb_tree
     param['max_evals'] = 10
     param['eval_metric'] = 'logloss'
     best = xgbopt.run_hp(param)
     self.assertIsNotNone(best)
     self.assertLess(xgbopt.score, 0.04)
    def test_cross_val_stack_none(self):
        x, y = DataGenerator.get_digits_data()

        # In order to obtain some categorical columns
        x["i63"] = x["i63"].map(str)
        x["i62"] = x["i62"].map(str)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
        dic = {}

        x_train.loc[:, "source"] = 0
        shadow_selector = x_train["source"] == 0
        ChaosGeneration.chaos_feature_importance(
            x_train, y_train, shadow_selector, feat_dic=dic, feat_iter=10, nb_features=20, chaos_gen_iter=30
        )
        sorted_x = sorted(dic.items(), key=operator.itemgetter(1))
        self.assertGreater(len(dic), len(x_train.columns))
Esempio n. 11
0
    def test_handle_nocategoric_nonreg(self):
        x, y = DataGenerator.get_digits_data()

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.33,
                                                            random_state=42)
        dic = {}

        x_train.loc[:, 'source'] = 0
        shadow_selector = x_train['source'] == 0
        ChaosGeneration.chaos_feature_importance(x_train,
                                                 y_train,
                                                 shadow_selector,
                                                 feat_dic=dic,
                                                 feat_iter=10,
                                                 nb_features=20,
                                                 chaos_gen_iter=30)
        self.assertGreater(len(dic), len(x_train.columns))