Example #1
0
    def test_random_forest(self):
        # loading
        x, y = DataGenerator.get_adult_data()

        # cleaning
        MissingValues.add_miss_val_indicator(x)

        x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

        x_train_1, x_valid_1 = Automaton.numerize(x_train, x_valid)

        sklparam = Cvs.get_best_sklopt(x_train_1, y_train, ParamsGenerator.get_rf_init_param())
        skopt = SklearnOpt.SklearnOpt(x_train_1, y_train)
        y_pred_valid, _ = Cvs.predict_opt_clf(skopt, sklparam, x_valid_1, x_valid_1)

        print 'Random Forest'
        print metrics.roc_auc_score(y_valid, y_pred_valid)
        print metrics.log_loss(y_valid, y_pred_valid)
Example #2
0
    def test_full_robot(self):
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
        # loading
        x, y = DataGenerator.get_adult_data()

        # cleaning
        MissingValues.add_miss_val_indicator(x)

        x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

        ext_ip, rf_ip, robot_args, xgb_ip = self.get_params()

        res = Automaton.robot(x_train, y_train, x_valid, rf_ip, ext_ip, xgb_ip, **robot_args)

        y_pred_valid = Misc.stacking_res_to_one_pred(res)

        print 'Full Robot'
        print metrics.roc_auc_score(y_valid, y_pred_valid)
        print metrics.log_loss(y_valid, y_pred_valid)
Example #3
0
    def test_tiny_robot(self):
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
        # loading
        x, y = DataGenerator.get_adult_data()

        # cleaning
        MissingValues.add_miss_val_indicator(x)

        x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

        ext_ip, rf_ip, robot_args, xgb_ip = self.get_params()

        x_train_num, _ = Automaton.numerize(x_train, x_valid, **robot_args)
        rf_rp = Misc.enhance_param(Cvs.get_best_sklopt(x_train_num, y_train, rf_ip), **robot_args)
        ext_rp = Misc.enhance_param(Cvs.get_best_etopt(x_train_num, y_train, ext_ip), **robot_args)
        xgb_rp = Misc.enhance_param(Cvs.get_best_xgbopt(x_train_num, y_train, xgb_ip), **robot_args)

        res = Automaton.tiny_robot(x_train, y_train, x_valid, rf_rp, ext_rp, xgb_rp, **robot_args)

        y_pred_valid = Misc.stacking_res_to_one_pred(res)

        print 'Tiny Robot'
        print metrics.roc_auc_score(y_valid, y_pred_valid)
        print metrics.log_loss(y_valid, y_pred_valid)
 def test_missingValue(self):
     df = pd.DataFrame({"a": [1, 2, 3], "b": [1, np.NaN, 4]})
     MissingValues.add_miss_val_indicator(df)
     self.assertTrue((df.b_is_nan.values == [0, 1, 0]).all())
 def test_fill_mean_object(self):
     df = pd.DataFrame({"a": [1, 2, 3], "b": ["1", np.NaN, 4]})
     MissingValues.fill_with_mean(df)
     self.assertTrue(np.isnan(df.b.values[1]).all())
 def test_fill_mean(self):
     df = pd.DataFrame({"a": [1, 2, 3], "b": [1, np.NaN, 4]})
     MissingValues.fill_with_mean(df)
     self.assertTrue((df.b.values == [1, 2.5, 4]).all())
 def test_GivenMissingValueDic(self):
     df = pd.DataFrame({"a": [1, 2, 3], "b": [1, np.NaN, 4]})
     MissingValues.add_miss_val_indicator_from_dic(df, miss_dic={"a": 1})
     self.assertTrue((df.a_is_nan.values == [1, 0, 0]).all())
Example #8
0
 def test_missingValue(self):
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [1, np.NaN, 4]})
     MissingValues.add_miss_val_indicator(df)
     self.assertTrue((df.b_is_nan.values == [0, 1, 0]).all())
Example #9
0
 def test_fill_mean_object(self):
     df = pd.DataFrame({'a': [1, 2, 3], 'b': ['1', np.NaN, 4]})
     MissingValues.fill_with_mean(df)
     self.assertTrue(np.isnan(df.b.values[1]).all())
Example #10
0
 def test_fill_mean(self):
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [1, np.NaN, 4]})
     MissingValues.fill_with_mean(df)
     self.assertTrue((df.b.values == [1, 2.5, 4]).all())
Example #11
0
 def test_GivenMissingValueDic(self):
     df = pd.DataFrame({'a': [1, 2, 3], 'b': [1, np.NaN, 4]})
     MissingValues.add_miss_val_indicator_from_dic(df, miss_dic={'a': 1})
     self.assertTrue((df.a_is_nan.values == [1, 0, 0]).all())