Beispiel #1
0
def test_regression_saved_without_feature_names():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=2,
                                     random_state=1,
                                     max_depth=1,
                                     base_score=base_score)
    utils.train_model_regression(estimator)

    with utils.tmp_dir() as tmp_dirpath:
        filename = os.path.join(tmp_dirpath, "tmp.file")
        estimator.save_model(filename)
        estimator = xgboost.XGBRegressor(base_score=base_score)
        estimator.load_model(filename)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(12),
                                     ast.NumVal(9.72500038),
                                     ast.CompOpType.GTE),
                        ast.NumVal(1.6614188), ast.NumVal(2.91697121))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998),
                                 ast.CompOpType.GTE), ast.NumVal(3.33810854),
                    ast.NumVal(1.71813202))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
def test_multi_class():
    estimator = ensemble.RandomForestClassifier(n_estimators=2,
                                                random_state=13)

    estimator.fit([[1], [2], [3]], [1, -1, 1])

    assembler = assemblers.RandomForestModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinVectorExpr(
        ast.BinVectorNumExpr(
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(0), ast.NumVal(1.5),
                                 ast.CompOpType.LTE),
                    ast.VectorVal([ast.NumVal(0.0),
                                   ast.NumVal(1.0)]),
                    ast.VectorVal([ast.NumVal(1.0),
                                   ast.NumVal(0.0)]))), ast.NumVal(0.5),
            ast.BinNumOpType.MUL),
        ast.BinVectorNumExpr(
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(0), ast.NumVal(2.5),
                                 ast.CompOpType.LTE),
                    ast.VectorVal([ast.NumVal(1.0),
                                   ast.NumVal(0.0)]),
                    ast.VectorVal([ast.NumVal(0.0),
                                   ast.NumVal(1.0)]))), ast.NumVal(0.5),
            ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected)
Beispiel #3
0
def test_regression_random_forest():
    estimator = lightgbm.LGBMRegressor(boosting_type="rf",
                                       n_estimators=2,
                                       random_state=1,
                                       max_depth=1,
                                       subsample=0.7,
                                       subsample_freq=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.IfExpr(
                            ast.CompExpr(ast.FeatureRef(5),
                                         ast.NumVal(6.954000000000001),
                                         ast.CompOpType.GT),
                            ast.NumVal(37.24347877367631),
                            ast.NumVal(19.936999995530854))),
                    ast.BinNumOpType.ADD),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(5),
                                     ast.NumVal(6.971500000000001),
                                     ast.CompOpType.GT),
                        ast.NumVal(38.48600037864964),
                        ast.NumVal(20.183783757300255))),
                ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL))

    assert utils.cmp_exprs(actual, expected)
Beispiel #4
0
def test_regression():
    estimator = lightgbm.LGBMRegressor(n_estimators=2,
                                       random_state=1,
                                       max_depth=1)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(0),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.918),
                                     ast.CompOpType.GT),
                        ast.NumVal(24.011454621684155),
                        ast.NumVal(22.289277544391084))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.63),
                                 ast.CompOpType.GT),
                    ast.NumVal(-0.49461212269771115),
                    ast.NumVal(0.7174324413014594))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Beispiel #5
0
def test_regression_random_forest():
    base_score = 0.6
    estimator = xgboost.XGBRFRegressor(n_estimators=2,
                                       random_state=1,
                                       max_depth=1,
                                       base_score=base_score)
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.XGBoostModelAssemblerSelector(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(0.6),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.8375001),
                                     ast.CompOpType.GTE),
                        ast.NumVal(17.3671646), ast.NumVal(9.48354053))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038),
                                 ast.CompOpType.GTE), ast.NumVal(8.31587982),
                    ast.NumVal(14.7766275))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Beispiel #6
0
def test_multi_class():
    estimator = xgboost.XGBClassifier(n_estimators=1,
                                      random_state=1,
                                      max_depth=1)
    estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3]))

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    exponent = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(ast.NumVal(0.5), ast.SubroutineExpr(ast.NumVal(0.0)),
                       ast.BinNumOpType.ADD)),
                           to_reuse=True)

    exponent_sum = ast.BinNumExpr(ast.BinNumExpr(exponent, exponent,
                                                 ast.BinNumOpType.ADD),
                                  exponent,
                                  ast.BinNumOpType.ADD,
                                  to_reuse=True)

    softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV)

    expected = ast.VectorVal([softmax] * 3)

    assert utils.cmp_exprs(actual, expected)
Beispiel #7
0
def test_regression():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=2,
                                     random_state=1,
                                     max_depth=1,
                                     base_score=base_score)
    utils.train_model_regression(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.SubroutineExpr(
                    ast.IfExpr(
                        ast.CompExpr(ast.FeatureRef(12),
                                     ast.NumVal(9.72500038),
                                     ast.CompOpType.GTE),
                        ast.NumVal(1.6614188), ast.NumVal(2.91697121))),
                ast.BinNumOpType.ADD),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998),
                                 ast.CompOpType.GTE), ast.NumVal(3.33810854),
                    ast.NumVal(1.71813202))), ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Beispiel #8
0
    def _split_into_subroutines(self, trees_ast, trees_num_leaves):
        result = []
        subroutine_trees = []
        subroutine_sum_leaves = 0
        for tree, num_leaves in zip(trees_ast, trees_num_leaves):
            next_sum = subroutine_sum_leaves + num_leaves
            if subroutine_trees and next_sum > self._leaves_cutoff_threshold:
                # Exceeded the max leaves in the current subroutine,
                # finalize this one and start a new one.
                partial_result = utils.apply_op_to_expressions(
                    ast.BinNumOpType.ADD, *subroutine_trees)

                result.append(ast.SubroutineExpr(partial_result))

                subroutine_trees = []
                subroutine_sum_leaves = 0

            subroutine_sum_leaves += num_leaves
            subroutine_trees.append(tree)

        if subroutine_trees:
            partial_result = utils.apply_op_to_expressions(
                ast.BinNumOpType.ADD, *subroutine_trees)
            result.append(ast.SubroutineExpr(partial_result))
        return result
Beispiel #9
0
def test_multi_class_best_ntree_limit():
    base_score = 0.5
    estimator = xgboost.XGBClassifier(n_estimators=100,
                                      random_state=1,
                                      max_depth=1,
                                      base_score=base_score)

    estimator.best_ntree_limit = 1

    utils.train_model_classification(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999),
                             ast.CompOpType.GTE), ast.NumVal(-0.0731707439),
                ast.NumVal(0.142857149)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999),
                             ast.CompOpType.GTE), ast.NumVal(0.0341463387),
                ast.NumVal(-0.0714285821)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.IfExpr(
                ast.CompExpr(ast.FeatureRef(2), ast.NumVal(4.85000038),
                             ast.CompOpType.GTE), ast.NumVal(0.129441619),
                ast.NumVal(-0.0681440532)), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1,
                                            estimator_exp_class2,
                                            ast.BinNumOpType.ADD),
                             estimator_exp_class3,
                             ast.BinNumOpType.ADD,
                             to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV)
    ])

    assert utils.cmp_exprs(actual, expected)
Beispiel #10
0
def test_multi_class():
    estimator = linear_model.LogisticRegression()
    estimator.coef_ = np.array([[1, 2], [3, 4], [5, 6]])
    estimator.intercept_ = np.array([7, 8, 9])

    assembler = assemblers.LinearModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.VectorVal([
        ast.SubroutineExpr(
            ast.BinNumExpr(
                ast.BinNumExpr(
                    ast.NumVal(7),
                    ast.BinNumExpr(
                        ast.FeatureRef(0),
                        ast.NumVal(1),
                        ast.BinNumOpType.MUL),
                    ast.BinNumOpType.ADD),
                ast.BinNumExpr(
                    ast.FeatureRef(1),
                    ast.NumVal(2),
                    ast.BinNumOpType.MUL),
                ast.BinNumOpType.ADD)),
        ast.SubroutineExpr(
            ast.BinNumExpr(
                ast.BinNumExpr(
                    ast.NumVal(8),
                    ast.BinNumExpr(
                        ast.FeatureRef(0),
                        ast.NumVal(3),
                        ast.BinNumOpType.MUL),
                    ast.BinNumOpType.ADD),
                ast.BinNumExpr(
                    ast.FeatureRef(1),
                    ast.NumVal(4),
                    ast.BinNumOpType.MUL),
                ast.BinNumOpType.ADD)),
        ast.SubroutineExpr(
            ast.BinNumExpr(
                ast.BinNumExpr(
                    ast.NumVal(9),
                    ast.BinNumExpr(
                        ast.FeatureRef(0),
                        ast.NumVal(5),
                        ast.BinNumOpType.MUL),
                    ast.BinNumOpType.ADD),
                ast.BinNumExpr(
                    ast.FeatureRef(1),
                    ast.NumVal(6),
                    ast.BinNumOpType.MUL),
                ast.BinNumOpType.ADD))])

    assert utils.cmp_exprs(actual, expected)
Beispiel #11
0
def test_leaves_cutoff_threshold():
    estimator = lightgbm.LGBMClassifier(n_estimators=2, random_state=1,
                                        max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator,
                                                  leaves_cutoff_threshold=1)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(0),
                                ast.SubroutineExpr(
                                    ast.SubroutineExpr(
                                        ast.IfExpr(
                                            ast.CompExpr(
                                                ast.FeatureRef(23),
                                                ast.NumVal(868.2000000000002),
                                                ast.CompOpType.GT),
                                            ast.NumVal(0.25986931215073095),
                                            ast.NumVal(0.6237178414050242)))),
                                ast.BinNumOpType.ADD),
                            ast.SubroutineExpr(
                                ast.SubroutineExpr(
                                    ast.IfExpr(
                                        ast.CompExpr(
                                            ast.FeatureRef(7),
                                            ast.NumVal(0.05142),
                                            ast.CompOpType.GT),
                                        ast.NumVal(-0.1909605544006228),
                                        ast.NumVal(0.1293965108676673)))),
                            ast.BinNumOpType.ADD)),
                    ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB),
        sigmoid])

    assert utils.cmp_exprs(actual, expected)
Beispiel #12
0
def test_leaves_cutoff_threshold():
    estimator = xgboost.XGBClassifier(n_estimators=2,
                                      random_state=1,
                                      max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator,
                                                 leaves_cutoff_threshold=1)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(-0.0),
                                ast.SubroutineExpr(
                                    ast.SubroutineExpr(
                                        ast.IfExpr(
                                            ast.CompExpr(
                                                ast.FeatureRef(20),
                                                ast.NumVal(16.7950001),
                                                ast.CompOpType.GTE),
                                            ast.NumVal(-0.173057005),
                                            ast.NumVal(0.163440868)))),
                                ast.BinNumOpType.ADD),
                            ast.SubroutineExpr(
                                ast.SubroutineExpr(
                                    ast.IfExpr(
                                        ast.CompExpr(ast.FeatureRef(27),
                                                     ast.NumVal(0.142349988),
                                                     ast.CompOpType.GTE),
                                        ast.NumVal(-0.161026895),
                                        ast.NumVal(0.149405137)))),
                            ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid
    ])

    assert utils.cmp_exprs(actual, expected)
Beispiel #13
0
def test_leaves_cutoff_threshold():
    estimator = lightgbm.LGBMClassifier(n_estimators=2,
                                        random_state=1,
                                        max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator,
                                                  leaves_cutoff_threshold=1)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(0),
                                ast.SubroutineExpr(
                                    ast.IfExpr(
                                        ast.CompExpr(
                                            ast.FeatureRef(23),
                                            ast.NumVal(868.2000000000002),
                                            ast.CompOpType.GT),
                                        ast.NumVal(0.2762557140263451),
                                        ast.NumVal(0.6399134166614473))),
                                ast.BinNumOpType.ADD),
                            ast.SubroutineExpr(
                                ast.IfExpr(
                                    ast.CompExpr(
                                        ast.FeatureRef(27),
                                        ast.NumVal(0.14205000000000004),
                                        ast.CompOpType.GT),
                                    ast.NumVal(-0.2139321843285849),
                                    ast.NumVal(0.1151466338793227))),
                            ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid
    ])

    assert utils.cmp_exprs(actual, expected)
Beispiel #14
0
def test_multi_output():
    expr = ast.SubroutineExpr(
        ast.IfExpr(
            ast.CompExpr(
                ast.NumVal(1),
                ast.NumVal(1),
                ast.CompOpType.EQ),
            ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]),
            ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])))

    expected_code = """
namespace ML {
    public static class Model {
        public static double[] Score(double[] input) {
            double[] var0;
            if ((1) == (1)) {
                var0 = new double[2] {1, 2};
            } else {
                var0 = new double[2] {3, 4};
            }
            return var0;
        }
    }
}
"""

    interpreter = CSharpInterpreter()
    utils.assert_code_equal(interpreter.interpret(expr), expected_code)
Beispiel #15
0
def test_regression():
    estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1,
                                       max_depth=1)
    utils.train_model_regression(estimator)

    assembler = assemblers.LightGBMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(0),
                ast.IfExpr(
                    ast.CompExpr(
                        ast.FeatureRef(5),
                        ast.NumVal(6.8455),
                        ast.CompOpType.GT),
                    ast.NumVal(24.007392728914056),
                    ast.NumVal(22.35695742616179)),
                ast.BinNumOpType.ADD),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(12),
                    ast.NumVal(9.63),
                    ast.CompOpType.GT),
                ast.NumVal(-0.4903836928981587),
                ast.NumVal(0.7222498915097475)),
            ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
        def assemble_tree_expr(t):
            assembler = TreeModelAssembler(t)

            return utils.apply_bin_op(
                ast.SubroutineExpr(assembler.assemble()),
                ast.NumVal(coef),
                ast.BinNumOpType.MUL)
Beispiel #17
0
def test_multi_output():
    expr = ast.SubroutineExpr(
        ast.IfExpr(
            ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ),
            ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]),
            ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])))

    expected_code = """
Module Model
Function score(ByRef input_vector() As Double) As Double()
    Dim var0() As Double
    If (1) == (1) Then
        Dim var1(1) As Double
        var1(0) = 1
        var1(1) = 2
        var0 = var1
    Else
        Dim var2(1) As Double
        var2(0) = 3
        var2(1) = 4
        var0 = var2
    End If
    score = var0
End Function
End Module
"""

    interpreter = VisualBasicInterpreter()
    utils.assert_code_equal(interpreter.interpret(expr), expected_code)
Beispiel #18
0
def test_regression_best_ntree_limit():
    base_score = 0.6
    estimator = xgboost.XGBRegressor(n_estimators=3, random_state=1,
                                     max_depth=1, base_score=base_score)

    estimator.best_ntree_limit = 2

    utils.train_model_regression(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.BinNumExpr(
                ast.NumVal(base_score),
                ast.IfExpr(
                    ast.CompExpr(
                        ast.FeatureRef(12),
                        ast.NumVal(9.72500038),
                        ast.CompOpType.GTE),
                    ast.NumVal(1.67318344),
                    ast.NumVal(2.92757893)),
                ast.BinNumOpType.ADD),
            ast.IfExpr(
                ast.CompExpr(
                    ast.FeatureRef(5),
                    ast.NumVal(6.94099998),
                    ast.CompOpType.GTE),
                ast.NumVal(3.3400948),
                ast.NumVal(1.72118247)),
            ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Beispiel #19
0
    def _assemble_single_output(self, trees, base_score=0):
        if self._tree_limit:
            trees = trees[:self._tree_limit]

        trees_ast = [self._assemble_tree(t) for t in trees]
        result_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                   ast.NumVal(base_score),
                                                   *trees_ast)
        return ast.SubroutineExpr(result_ast)
Beispiel #20
0
def test_single_condition():
    estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1)

    estimator.fit([[1], [2]], [1, 2])

    assembler = assemblers.RandomForestModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.BinNumExpr(
            ast.SubroutineExpr(ast.NumVal(1.0)),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(0),
                                 ast.NumVal(1.5), ast.CompOpType.LTE),
                    ast.NumVal(1.0), ast.NumVal(2.0))), ast.BinNumOpType.ADD),
        ast.NumVal(0.5), ast.BinNumOpType.MUL)

    assert utils.cmp_exprs(actual, expected)
Beispiel #21
0
 def kernel_ast(sup_vec_value):
     return ast.SubroutineExpr(
         ast.ExpExpr(
             ast.BinNumExpr(
                 negative_gamma_ast,
                 ast.PowExpr(
                     ast.BinNumExpr(ast.NumVal(sup_vec_value),
                                    ast.FeatureRef(0),
                                    ast.BinNumOpType.SUB), ast.NumVal(2)),
                 ast.BinNumOpType.MUL)))
Beispiel #22
0
    def _assemble_single_output(self, trees, base_score=0):
        if self._tree_limit:
            trees = trees[:self._tree_limit]

        trees_ast = [ast.SubroutineExpr(self._assemble_tree(t)) for t in trees]
        to_sum = trees_ast

        # In a large tree we need to generate multiple subroutines to avoid
        # java limitations https://github.com/BayesWitnesses/m2cgen/issues/103.
        trees_num_leaves = [self._count_leaves(t) for t in trees]
        if sum(trees_num_leaves) > self._leaves_cutoff_threshold:
            to_sum = self._split_into_subroutines(trees_ast, trees_num_leaves)

        tmp_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                ast.NumVal(base_score),
                                                *to_sum)

        result_ast = self._final_transform(tmp_ast)

        return ast.SubroutineExpr(result_ast)
Beispiel #23
0
 def kernel_ast(sup_vec_value):
     return ast.SubroutineExpr(
         ast.PowExpr(
             ast.BinNumExpr(
                 ast.BinNumExpr(
                     ast.NumVal(estimator.gamma),
                     ast.BinNumExpr(ast.NumVal(sup_vec_value),
                                    ast.FeatureRef(0),
                                    ast.BinNumOpType.MUL),
                     ast.BinNumOpType.MUL), ast.NumVal(0.0),
                 ast.BinNumOpType.ADD), ast.NumVal(estimator.degree)))
Beispiel #24
0
    def _build_ast(self):
        coef = utils.to_2d_array(self._get_coef())
        intercept = utils.to_1d_array(self._get_intercept())

        if coef.shape[0] == 1:
            return _linear_to_ast(coef[0], intercept[0])

        exprs = []
        for idx in range(coef.shape[0]):
            exprs.append(
                ast.SubroutineExpr(_linear_to_ast(coef[idx], intercept[idx])))
        return ast.VectorVal(exprs)
Beispiel #25
0
def test_multi_class_best_ntree_limit():
    base_score = 0.5
    estimator = xgboost.XGBClassifier(n_estimators=100,
                                      random_state=1,
                                      max_depth=1,
                                      base_score=base_score)

    estimator.best_ntree_limit = 1

    utils.train_model_classification(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(2),
                                 ast.NumVal(2.45000005), ast.CompOpType.GTE),
                    ast.NumVal(-0.0733167157), ast.NumVal(0.143414631))),
            ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005),
                                 ast.CompOpType.GTE), ast.NumVal(0.0344139598),
                    ast.NumVal(-0.0717073306))), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            ast.SubroutineExpr(
                ast.IfExpr(
                    ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500001),
                                 ast.CompOpType.GTE), ast.NumVal(0.13432835),
                    ast.NumVal(-0.0644444525))), ast.BinNumOpType.ADD)),
                                       to_reuse=True)

    exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1,
                                            estimator_exp_class2,
                                            ast.BinNumOpType.ADD),
                             estimator_exp_class3,
                             ast.BinNumOpType.ADD,
                             to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV),
        ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV)
    ])

    assert utils.cmp_exprs(actual, expected)
Beispiel #26
0
    def _assemble_single_output(self,
                                estimator_params,
                                base_score=0,
                                split_idx=0):
        estimators_ast = self._assemble_estimators(estimator_params, split_idx)

        tmp_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                ast.NumVal(base_score),
                                                *estimators_ast)

        result_ast = self._final_transform(tmp_ast)

        return ast.SubroutineExpr(result_ast)
Beispiel #27
0
    def _assemble_estimators(self, trees, split_idx):
        if self._tree_limit:
            trees = trees[:self._tree_limit]

        trees_ast = [ast.SubroutineExpr(self._assemble_tree(t)) for t in trees]

        # In a large tree we need to generate multiple subroutines to avoid
        # java limitations https://github.com/BayesWitnesses/m2cgen/issues/103.
        trees_num_leaves = [self._count_leaves(t) for t in trees]
        if sum(trees_num_leaves) > self._leaves_cutoff_threshold:
            return self._split_into_subroutines(trees_ast, trees_num_leaves)
        else:
            return trees_ast
Beispiel #28
0
def _rbf_kernel_ast(estimator, sup_vec_value, to_reuse=False):
    negative_gamma_ast = ast.BinNumExpr(ast.NumVal(0),
                                        ast.NumVal(estimator.gamma),
                                        ast.BinNumOpType.SUB,
                                        to_reuse=True)

    return ast.SubroutineExpr(ast.ExpExpr(
        ast.BinNumExpr(
            negative_gamma_ast,
            ast.PowExpr(
                ast.BinNumExpr(ast.NumVal(sup_vec_value),
                               ast.FeatureRef(0), ast.BinNumOpType.SUB),
                ast.NumVal(2)), ast.BinNumOpType.MUL)),
                              to_reuse=to_reuse)
Beispiel #29
0
def test_linear_model():
    estimator = xgboost.XGBRegressor(n_estimators=2,
                                     random_state=1,
                                     feature_selector="shuffle",
                                     booster="gblinear")
    utils.get_regression_model_trainer()(estimator)

    assembler = assemblers.XGBoostModelAssemblerSelector(estimator)
    actual = assembler.assemble()

    feature_weight_mul = [
        ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.00999326),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0520094),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.10447),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.17387),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.691745),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.296357),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0288206),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.417822),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0551116),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.00242449),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.109585),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.00744202),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(0.0731089),
                       ast.BinNumOpType.MUL),
    ]

    expected = ast.SubroutineExpr(
        ast.BinNumExpr(
            ast.NumVal(0.5),
            assemblers.utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                     ast.NumVal(3.13109),
                                                     *feature_weight_mul),
            ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Beispiel #30
0
def test_binary_classification():
    estimator = xgboost.XGBClassifier(n_estimators=2, random_state=1,
                                      max_depth=1)
    utils.train_model_classification_binary(estimator)

    assembler = assemblers.XGBoostModelAssembler(estimator)
    actual = assembler.assemble()

    sigmoid = ast.BinNumExpr(
        ast.NumVal(1),
        ast.BinNumExpr(
            ast.NumVal(1),
            ast.ExpExpr(
                ast.BinNumExpr(
                    ast.NumVal(0),
                    ast.SubroutineExpr(
                        ast.BinNumExpr(
                            ast.BinNumExpr(
                                ast.NumVal(-0.0),
                                ast.IfExpr(
                                    ast.CompExpr(
                                        ast.FeatureRef(20),
                                        ast.NumVal(16.7950001),
                                        ast.CompOpType.GTE),
                                    ast.NumVal(-0.17062147),
                                    ast.NumVal(0.1638484)),
                                ast.BinNumOpType.ADD),
                            ast.IfExpr(
                                ast.CompExpr(
                                    ast.FeatureRef(27),
                                    ast.NumVal(0.142349988),
                                    ast.CompOpType.GTE),
                                ast.NumVal(-0.16087772),
                                ast.NumVal(0.149866998)),
                            ast.BinNumOpType.ADD)),
                    ast.BinNumOpType.SUB)),
            ast.BinNumOpType.ADD),
        ast.BinNumOpType.DIV,
        to_reuse=True)

    expected = ast.VectorVal([
        ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB),
        sigmoid])

    assert utils.cmp_exprs(actual, expected)