Ejemplo n.º 1
0
    def _split_into_subroutines(self, trees_ast, trees_num_leaves):
        result = []
        subroutine_trees = []
        subroutine_sum_leaves = 0
        for tree, num_leaves in zip(trees_ast, trees_num_leaves):
            next_sum = subroutine_sum_leaves + num_leaves
            if subroutine_trees and next_sum > self._leaves_cutoff_threshold:
                # Exceeded the max leaves in the current subroutine,
                # finalize this one and start a new one.
                partial_result = utils.apply_op_to_expressions(
                    ast.BinNumOpType.ADD, *subroutine_trees)

                result.append(ast.SubroutineExpr(partial_result))

                subroutine_trees = []
                subroutine_sum_leaves = 0

            subroutine_sum_leaves += num_leaves
            subroutine_trees.append(tree)

        if subroutine_trees:
            partial_result = utils.apply_op_to_expressions(
                ast.BinNumOpType.ADD, *subroutine_trees)
            result.append(ast.SubroutineExpr(partial_result))
        return result
Ejemplo n.º 2
0
    def _assemble_multi_class_output(self):
        support_vectors = self.model.support_vectors_
        coef = self.model.dual_coef_
        intercept = self.model.intercept_

        n_support = self.model.n_support_
        n_support_len = len(n_support)

        kernel_exprs = self._apply_kernel(support_vectors, to_reuse=True)

        support_ranges = []
        for i in range(n_support_len):
            range_start = sum(n_support[:i])
            range_end = range_start + n_support[i]
            support_ranges.append((range_start, range_end))

        # One-vs-one decisions.
        decisions = []
        for i in range(n_support_len):
            for j in range(i + 1, n_support_len):
                kernel_weight_mul_ops = [
                    utils.mul(kernel_exprs[k], ast.NumVal(coef[i][k]))
                    for k in range(*support_ranges[j])
                ]
                kernel_weight_mul_ops.extend([
                    utils.mul(kernel_exprs[k], ast.NumVal(coef[j - 1][k]))
                    for k in range(*support_ranges[i])
                ])
                decision = utils.apply_op_to_expressions(
                    ast.BinNumOpType.ADD,
                    ast.NumVal(intercept[len(decisions)]),
                    *kernel_weight_mul_ops)
                decisions.append(decision)

        return ast.VectorVal(decisions)
Ejemplo n.º 3
0
def _linear_to_ast(coef, intercept):
    feature_weight_mul_ops = [
        utils.mul(ast.FeatureRef(index), ast.NumVal(value))
        for index, value in enumerate(coef)
    ]
    return utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                         ast.NumVal(intercept),
                                         *feature_weight_mul_ops)
Ejemplo n.º 4
0
    def _assemble_single_output(self, trees, base_score=0):
        if self._tree_limit:
            trees = trees[:self._tree_limit]

        trees_ast = [self._assemble_tree(t) for t in trees]
        result_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                   ast.NumVal(base_score),
                                                   *trees_ast)
        return ast.SubroutineExpr(result_ast)
Ejemplo n.º 5
0
def softmax(exprs):
    exp_exprs = [ast.ExpExpr(e, to_reuse=True) for e in exprs]
    exp_sum_expr = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                 *exp_exprs,
                                                 to_reuse=True)
    return [
        ast.BinNumExpr(e, exp_sum_expr, ast.BinNumOpType.DIV)
        for e in exp_exprs
    ]
Ejemplo n.º 6
0
 def _rbf_kernel(self, support_vector):
     elem_wise = [
         ast.PowExpr(
             utils.sub(ast.NumVal(support_element), ast.FeatureRef(i)),
             ast.NumVal(2))
         for i, support_element in enumerate(support_vector)
     ]
     kernel = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                            *elem_wise)
     kernel = utils.mul(self._neg_gamma_expr, kernel)
     return ast.ExpExpr(kernel)
Ejemplo n.º 7
0
    def _assemble_single_output(self, estimator_params,
                                base_score=0, split_idx=0):
        estimators_ast = self._assemble_estimators(estimator_params, split_idx)

        tmp_ast = utils.apply_op_to_expressions(
            ast.BinNumOpType.ADD,
            ast.NumVal(base_score),
            *estimators_ast)

        result_ast = self._final_transform(tmp_ast)

        return result_ast
Ejemplo n.º 8
0
    def assemble(self):
        trees = self.model.estimators_

        def assemble_tree_expr(t):
            assembler = TreeModelAssembler(t)

            return assembler.assemble()

        assembled_trees = [assemble_tree_expr(t) for t in trees]
        return utils.apply_bin_op(
            utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                          *assembled_trees),
            ast.NumVal(1 / self.model.n_estimators), ast.BinNumOpType.MUL)
    def assemble(self):
        coef = 1.0 / self.model.n_estimators
        trees = self.model.estimators_

        def assemble_tree_expr(t):
            assembler = TreeModelAssembler(t)

            return utils.apply_bin_op(
                ast.SubroutineExpr(assembler.assemble()),
                ast.NumVal(coef),
                ast.BinNumOpType.MUL)

        assembled_trees = [assemble_tree_expr(t) for t in trees]
        return utils.apply_op_to_expressions(
            ast.BinNumOpType.ADD, *assembled_trees)
Ejemplo n.º 10
0
    def _assemble_single_output(self):
        support_vectors = self.model.support_vectors_
        coef = self.model.dual_coef_[0]
        intercept = self.model.intercept_[0]

        kernel_exprs = self._apply_kernel(support_vectors)

        kernel_weight_mul_ops = []
        for index, value in enumerate(coef):
            kernel_weight_mul_ops.append(
                utils.mul(kernel_exprs[index], ast.NumVal(value)))

        return utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                             ast.NumVal(intercept),
                                             *kernel_weight_mul_ops)
Ejemplo n.º 11
0
    def _assemble_single_output(self, idx=0):
        support_vectors = self.model.support_vectors_
        coef = self._get_single_coef(idx)
        intercept = self._get_single_intercept(idx)

        kernel_exprs = self._apply_kernel(support_vectors)

        kernel_weight_mul_ops = [
            utils.mul(kernel_exprs[index], ast.NumVal(value))
            for index, value in enumerate(coef)
        ]

        return utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                             ast.NumVal(intercept),
                                             *kernel_weight_mul_ops)
Ejemplo n.º 12
0
 def _cosine_kernel(self, support_vector):
     support_vector_norm = np.linalg.norm(support_vector)
     if support_vector_norm == 0.0:
         support_vector_norm = 1.0
     feature_norm = ast.SqrtExpr(utils.apply_op_to_expressions(
         ast.BinNumOpType.ADD, *[
             utils.mul(ast.FeatureRef(i), ast.FeatureRef(i))
             for i in range(len(support_vector))
         ]),
                                 to_reuse=True)
     safe_feature_norm = ast.IfExpr(utils.eq(feature_norm, ast.NumVal(0.0)),
                                    ast.NumVal(1.0), feature_norm)
     kernel = self._linear_kernel(support_vector / support_vector_norm)
     kernel = utils.div(kernel, safe_feature_norm)
     return kernel
Ejemplo n.º 13
0
def test_linear_model():
    # Default updater ("shotgun") is nondeterministic
    estimator = xgb.XGBRegressor(n_estimators=2,
                                 random_state=1,
                                 updater="coord_descent",
                                 feature_selector="shuffle",
                                 booster="gblinear")
    utils.get_regression_model_trainer()(estimator)

    assembler = XGBoostModelAssemblerSelector(estimator)
    actual = assembler.assemble()

    feature_weight_mul = [
        ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.154567),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0815865),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.0979713),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(4.80472),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(1.35478),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.327222),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0610654),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.46989),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(-0.0674318),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.000506212),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.0732867),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0108842),
                       ast.BinNumOpType.MUL),
        ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.140096),
                       ast.BinNumOpType.MUL),
    ]

    expected = ast.BinNumExpr(
        ast.NumVal(0.5),
        apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(11.138),
                                *feature_weight_mul), ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected)
Ejemplo n.º 14
0
    def _assemble_single_output(self, trees, base_score=0):
        if self._tree_limit:
            trees = trees[:self._tree_limit]

        trees_ast = [ast.SubroutineExpr(self._assemble_tree(t)) for t in trees]
        to_sum = trees_ast

        # In a large tree we need to generate multiple subroutines to avoid
        # java limitations https://github.com/BayesWitnesses/m2cgen/issues/103.
        trees_num_leaves = [self._count_leaves(t) for t in trees]
        if sum(trees_num_leaves) > self._leaves_cutoff_threshold:
            to_sum = self._split_into_subroutines(trees_ast, trees_num_leaves)

        tmp_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD,
                                                ast.NumVal(base_score),
                                                *to_sum)

        result_ast = self._final_transform(tmp_ast)

        return ast.SubroutineExpr(result_ast)
Ejemplo n.º 15
0
 def _linear_kernel(self, support_vector):
     elem_wise = [
         utils.mul(ast.NumVal(support_element), ast.FeatureRef(i))
         for i, support_element in enumerate(support_vector)
     ]
     return utils.apply_op_to_expressions(ast.BinNumOpType.ADD, *elem_wise)