Esempio n. 1
0
def test_regression():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1,
                                     max_depth=1, base_score=base_score)
    utils.train_model_regression(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.IfExpr(
                    ast.CompExpr(
                        ast.FeatureRef(12),
                        ast.NumVal(9.72500038),
                        ast.CompOpType.GTE),
                    ast.NumVal(1.67318344),
                    ast.NumVal(2.92757893)),
                ast.BinNumOpType.ADD),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.94099998),
                    ast.CompOpType.GTE),
                ast.NumVal(3.3400948),
                ast.NumVal(1.72118247)),
            ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Esempio n. 2
0
def test_multi_class():
    estimator = xgboost.XGBClassifier(n_estimators=1, random_state=1,
                                      max_depth=1)
    estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3]))

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    exponent = ast.ExpExpr(
        ast.SubroutineExpr(
            ast.BinNumExpr(
                ast.NumVal(0.5),
                ast.NumVal(0.0),
                ast.BinNumOpType.ADD)),
        to_reuse=True)

    exponent_sum = ast.BinNumExpr(
        ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD),
        exponent,
        ast.BinNumOpType.ADD,
        to_reuse=True)

    softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV)

    expected = ast.VectorVal([softmax] * 3)

    assert utils.cmp_exprs(actual, expected)
Esempio n. 3
0
def test_regression_saved_without_feature_names():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=2,
                                     random_state=1,
                                     max_depth=1,
                                     base_score=base_score)
    utils.train_model_regression(estimator)

    with utils.tmp_dir() as tmp_dirpath:
        filename = os.path.join(tmp_dirpath, "tmp.file")
        estimator.save_model(filename)
        estimator = xgboost.XGBRegressor(base_score=base_score)
        estimator.load_model(filename)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(12),
                                     ast.NumVal(9.72500038),
                                     ast.CompOpType.GTE),
                        ast.NumVal(1.6614188), ast.NumVal(2.91697121))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998),
                                 ast.CompOpType.GTE), ast.NumVal(3.33810854),
                    ast.NumVal(1.71813202))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Esempio n. 4
0
def test_regression_best_ntree_limit():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=3,
                                     random_state=1,
                                     max_depth=1,
                                     base_score=base_score)

    estimator.best_ntree_limit = 2

    utils.train_model_regression(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(12),
                                     ast.NumVal(9.72500038),
                                     ast.CompOpType.GTE),
                        ast.NumVal(1.6614188), ast.NumVal(2.91697121))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998),
                                 ast.CompOpType.GTE), ast.NumVal(3.33810854),
                    ast.NumVal(1.71813202))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Esempio n. 5
0
def test_multi_class_best_ntree_limit():
    base_score = 0.5
    estimator = xgboost.XGBClassifier(n_estimators=100,
                                      random_state=1,
                                      max_depth=1,
                                      base_score=base_score)

    estimator.best_ntree_limit = 1

    utils.train_model_classification(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(2),
                                 ast.NumVal(2.45000005), ast.CompOpType.GTE),
                    ast.NumVal(-0.0733167157), ast.NumVal(0.143414631))),
            ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005),
                                 ast.CompOpType.GTE), ast.NumVal(0.0344139598),
                    ast.NumVal(-0.0717073306))), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500001),
                                 ast.CompOpType.GTE), ast.NumVal(0.13432835),
                    ast.NumVal(-0.0644444525))), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1,
                                            estimator_exp_class2,
                                            ast.BinNumOpType.ADD),
                             estimator_exp_class3,
                             ast.BinNumOpType.ADD,
                             to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV)
    ])

    assert utils.cmp_exprs(actual, expected)
Esempio n. 6
0
def test_multi_class_best_ntree_limit():
    base_score = 0.5
    estimator = xgboost.XGBClassifier(n_estimators=100,
                                      random_state=1,
                                      max_depth=1,
                                      base_score=base_score)

    estimator.best_ntree_limit = 1

    utils.train_model_classification(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999),
                             ast.CompOpType.GTE), ast.NumVal(-0.0731707439),
                ast.NumVal(0.142857149)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999),
                             ast.CompOpType.GTE), ast.NumVal(0.0341463387),
                ast.NumVal(-0.0714285821)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(4.85000038),
                             ast.CompOpType.GTE), ast.NumVal(0.129441619),
                ast.NumVal(-0.0681440532)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1,
                                            estimator_exp_class2,
                                            ast.BinNumOpType.ADD),
                             estimator_exp_class3,
                             ast.BinNumOpType.ADD,
                             to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV)
    ])

    assert utils.cmp_exprs(actual, expected)
Esempio n. 7
0
def test_leaves_cutoff_threshold():
    estimator = xgboost.XGBClassifier(n_estimators=2,
                                      random_state=1,
                                      max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator,
                                                 leaves_cutoff_threshold=1)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(-0.0),
                                ast.SubroutineExpr(
                                    ast.SubroutineExpr(
                                        ast.IfExpr(
                                            ast.CompExpr(
                                                ast.FeatureRef(20),
                                                ast.NumVal(16.7950001),
                                                ast.CompOpType.GTE),
                                            ast.NumVal(-0.173057005),
                                            ast.NumVal(0.163440868)))),
                                ast.BinNumOpType.ADD),
                            ast.SubroutineExpr(
                                ast.SubroutineExpr(
                                    ast.IfExpr(
                                        ast.CompExpr(ast.FeatureRef(27),
                                                     ast.NumVal(0.142349988),
                                                     ast.CompOpType.GTE),
                                        ast.NumVal(-0.161026895),
                                        ast.NumVal(0.149405137)))),
                            ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid
    ])

    assert utils.cmp_exprs(actual, expected)
Esempio n. 8
0
def test_binary_classification():
    estimator = xgboost.XGBClassifier(n_estimators=2, random_state=1,
                                      max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(-0.0),
                                ast.IfExpr(
                                    ast.CompExpr(
                                        ast.FeatureRef(20),
                                        ast.NumVal(16.7950001),
                                        ast.CompOpType.GTE),
                                    ast.NumVal(-0.17062147),
                                    ast.NumVal(0.1638484)),
                                ast.BinNumOpType.ADD),
                            ast.IfExpr(
                                ast.CompExpr(
                                    ast.FeatureRef(27),
                                    ast.NumVal(0.142349988),
                                    ast.CompOpType.GTE),
                                ast.NumVal(-0.16087772),
                                ast.NumVal(0.149866998)),
                            ast.BinNumOpType.ADD)),
                    ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB),
        sigmoid])

    assert utils.cmp_exprs(actual, expected)