def test_bin_vector_expr(): expr = ast.BinVectorExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), ast.BinNumOpType.ADD) expected_code = """ score <- function(input) { return((c(1.0, 2.0)) + (c(3.0, 4.0))) } """ interpreter = RInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_expr(): expr = ast.BinVectorExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), ast.BinNumOpType.MUL) interpreter = interpreters.PythonInterpreter() expected_code = """ import numpy as np def score(input): return (np.asarray([1, 2])) * (np.asarray([3, 4])) """ utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_expr(): expr = ast.BinVectorExpr( ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), ast.BinNumOpType.ADD) expected_code = """ Module Model Function AddVectors(ByRef v1() As Double, ByRef v2() As Double) As Double() Dim resLength As Integer resLength = UBound(v1) - LBound(v1) Dim result() As Double ReDim result(resLength) Dim i As Integer For i = LBound(v1) To UBound(v1) result(i) = v1(i) + v2(i) Next i AddVectors = result End Function Function MulVectorNumber(ByRef v1() As Double, ByVal num As Double) As Double() Dim resLength As Integer resLength = UBound(v1) - LBound(v1) Dim result() As Double ReDim result(resLength) Dim i As Integer For i = LBound(v1) To UBound(v1) result(i) = v1(i) * num Next i MulVectorNumber = result End Function Function Score(ByRef inputVector() As Double) As Double() Dim var0(1) As Double var0(0) = 1.0 var0(1) = 2.0 Dim var1(1) As Double var1(0) = 3.0 var1(1) = 4.0 Score = AddVectors(var0, var1) End Function End Module """ interpreter = VisualBasicInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def _assemble_multi_class_output(self): support_vectors = self.model.support_vectors_ coef = self.model.dual_coef_ intercept = self.model.intercept_ n_support = self.model.n_support_ n_support_len = len(n_support) kernel_exprs = self._apply_kernel(support_vectors, to_reuse=True) support_ranges = [] for i in range(n_support_len): range_start = sum(n_support[:i]) range_end = range_start + n_support[i] support_ranges.append((range_start, range_end)) # One-vs-one decisions. decisions = [] for i in range(n_support_len): for j in range(i + 1, n_support_len): kernel_weight_mul_ops = [ utils.mul(kernel_exprs[k], ast.NumVal(coef[i][k])) for k in range(*support_ranges[j]) ] kernel_weight_mul_ops.extend([ utils.mul(kernel_exprs[k], ast.NumVal(coef[j - 1][k])) for k in range(*support_ranges[i]) ]) decision = utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(intercept[len(decisions)]), *kernel_weight_mul_ops) decisions.append(decision) return ast.VectorVal(decisions)
def test_bin_vector_expr(): expr = ast.BinVectorExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), ast.BinNumOpType.ADD) expected_code = """ let private addVectors v1 v2 = List.map2 (+) v1 v2 let private mulVectorNumber v1 num = List.map (fun i -> i * num) v1 let score (input : double list) = addVectors ([1.0; 2.0]) ([3.0; 4.0]) """ interpreter = FSharpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_class_sigmoid_output_transform(): estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1, max_depth=1, sigmoid=0.5) utils.get_binary_classification_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(20), ast.NumVal(16.795), ast.CompOpType.GT), ast.NumVal(0.5500419366076967), ast.NumVal(1.2782342253678096)), ast.BinNumOpType.MUL), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid ]) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = linear_model.LogisticRegression() estimator.coef_ = np.array([[1, 2], [3, 4], [5, 6]]) estimator.intercept_ = np.array([7, 8, 9]) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.VectorVal([ ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(7), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(1), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(2), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(8), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(3), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(4), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(9), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(5), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(6), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) ]) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1, max_depth=1) estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() exponent = ast.ExpExpr(ast.BinNumExpr(ast.NumVal(0.0), ast.NumVal(-1.0986122886681098), ast.BinNumOpType.ADD), to_reuse=True) exponent_sum = ast.BinNumExpr(ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD), exponent, ast.BinNumOpType.ADD, to_reuse=True) softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV) expected = ast.VectorVal([softmax] * 3) assert utils.cmp_exprs(actual, expected)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr( ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) expected_code = """ <?php function addVectors(array $v1, array $v2) { $result = array(); for ($i = 0; $i < count($v1); ++$i) { $result[] = $v1[$i] + $v2[$i]; } return $result; } function mulVectorNumber(array $v1, $num) { $result = array(); for ($i = 0; $i < count($v1); ++$i) { $result[] = $v1[$i] * $num; } return $result; } function score(array $input) { return mulVectorNumber(array(1, 2), 1); } """ interpreter = PhpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr( ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) expected_code = """ function Add-Vectors([double[]] $v1, [double[]] $v2) { [int] $length = $v1.Length [double[]] $result = @(0) * $length for ([int] $i = 0; $i -lt $length; ++$i) { $result[$i] = $v1[$i] + $v2[$i] } return $result } function Mul-Vector-Number([double[]] $v1, [double] $num) { [int] $length = $v1.Length [double[]] $result = @(0) * $length for ([int] $i = 0; $i -lt $length; ++$i) { $result[$i] = $v1[$i] * $num } return $result } function Score([double[]] $InputVector) { return Mul-Vector-Number $(@($(1), $(2))) $(1) } """ interpreter = PowershellInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) interpreter = GoInterpreter() expected_code = """ func addVectors(v1, v2 []float64) []float64 { result := make([]float64, len(v1)) for i := 0; i < len(v1); i++ { result[i] = v1[i] + v2[i] } return result } func mulVectorNumber(v1 []float64, num float64) []float64 { result := make([]float64, len(v1)) for i := 0; i < len(v1); i++ { result[i] = v1[i] * num } return result } func score(input []float64) []float64 { return mulVectorNumber([]float64{1.0, 2.0}, 1.0) }""" utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) interpreter = interpreters.CInterpreter() expected_code = """ void assign_array(double source[], double *target, int size) { for(int i = 0; i < size; ++i) target[i] = source[i]; } void add_vectors(double *v1, double *v2, int size, double *result) { for(int i = 0; i < size; ++i) result[i] = v1[i] + v2[i]; } void mul_vector_number(double *v1, double num, int size, double *result) { for(int i = 0; i < size; ++i) result[i] = v1[i] * num; } void score(double * input, double * output) { double var0[2]; mul_vector_number((double[]){1, 2}, 1, 2, var0); assign_array(var0, output, 2); }""" utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) expected_code = """ namespace ML { public static class Model { public static double[] Score(double[] input) { return MulVectorNumber(new double[2] {1.0, 2.0}, 1.0); } private static double[] AddVectors(double[] v1, double[] v2) { double[] result = new double[v1.Length]; for (int i = 0; i < v1.Length; ++i) { result[i] = v1[i] + v2[i]; } return result; } private static double[] MulVectorNumber(double[] v1, double num) { double[] result = new double[v1.Length]; for (int i = 0; i < v1.Length; ++i) { result[i] = v1[i] * num; } return result; } } } """ interpreter = CSharpInterpreter() assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) expected_code = """ List<double> score(List<double> input) { return mulVectorNumber([1.0, 2.0], 1.0); } List<double> addVectors(List<double> v1, List<double> v2) { List<double> result = new List<double>(v1.length); for (int i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } List<double> mulVectorNumber(List<double> v1, double num) { List<double> result = new List<double>(v1.length); for (int i = 0; i < v1.length; i++) { result[i] = v1[i] * num; } return result; } """ interpreter = DartInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_multi_class(): estimator = xgboost.XGBClassifier(n_estimators=1, random_state=1, max_depth=1) estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() exponent = ast.ExpExpr( ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.NumVal(0.0), ast.BinNumOpType.ADD)), to_reuse=True) exponent_sum = ast.BinNumExpr( ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD), exponent, ast.BinNumOpType.ADD, to_reuse=True) softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV) expected = ast.VectorVal([softmax] * 3) assert utils.cmp_exprs(actual, expected)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) interpreter = interpreters.JavascriptInterpreter() expected_code = """ function score(input) { return mulVectorNumber([1, 2], 1); } function addVectors(v1, v2) { let result = new Array(v1.length); for (let i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } function mulVectorNumber(v1, num) { let result = new Array(v1.length); for (let i = 0; i < v1.length; i++) { result[i] = v1[i] * num; } return result; } """ utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_multi_class_sigmoid_output_transform(): estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1, max_depth=1, sigmoid=0.5, objective="ovr") estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.BinNumExpr(ast.NumVal(0.5), ast.NumVal(-1.3862943611), ast.BinNumOpType.MUL), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV) expected = ast.VectorVal([sigmoid] * 3) assert utils.cmp_exprs(actual, expected)
def test_bin_vector_num_expr(): expr = ast.BinVectorNumExpr(ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.NumVal(1), ast.BinNumOpType.MUL) interpreter = JavaInterpreter() expected_code = """ public class Model { public static double[] score(double[] input) { return mulVectorNumber(new double[] {1.0, 2.0}, 1.0); } private static double[] addVectors(double[] v1, double[] v2) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] + v2[i]; } return result; } private static double[] mulVectorNumber(double[] v1, double num) { double[] result = new double[v1.length]; for (int i = 0; i < v1.length; i++) { result[i] = v1[i] * num; } return result; } }""" utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_count_exprs(): assert ast.count_exprs( ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.ADD) ) == 3 assert ast.count_exprs( ast.ExpExpr(ast.NumVal(2)) ) == 2 assert ast.count_exprs( ast.VectorVal([ ast.NumVal(2), ast.TanhExpr(ast.NumVal(3)) ]) ) == 4 assert ast.count_exprs( ast.IfExpr( ast.CompExpr(ast.NumVal(2), ast.NumVal(0), ast.CompOpType.GT), ast.NumVal(3), ast.NumVal(4), ) ) == 6 assert ast.count_exprs(ast.NumVal(1)) == 1
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(-0.0733167157), ast.NumVal(0.143414631))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(0.0344139598), ast.NumVal(-0.0717073306))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500001), ast.CompOpType.GTE), ast.NumVal(0.13432835), ast.NumVal(-0.0644444525))), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def test_multi_output(): expr = ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])) expected_code = """ def score(input): if (1.0) == (1.0): var0 = [1.0, 2.0] else: var0 = [3.0, 4.0] return var0 """ interpreter = interpreters.PythonInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_multi_output(): expr = ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])) expected_code = """ func score(input []float64) []float64 { var var0 []float64 if (1.0) == (1.0) { var0 = []float64{1.0, 2.0} } else { var0 = []float64{3.0, 4.0} } return var0 }""" interpreter = GoInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_vector_expr(): expr = ast.BinVectorExpr( ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]), ast.BinNumOpType.ADD) interpreter = PythonInterpreter() expected_code = """ def add_vectors(v1, v2): return [sum(i) for i in zip(v1, v2)] def mul_vector_number(v1, num): return [i * num for i in v1] def score(input): return add_vectors([1.0, 2.0], [3.0, 4.0]) """ utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_multi_output(): expr = ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])) expected_code = """ let score (input : double list) = let func0 = if ((1.0) = (1.0)) then [1.0; 2.0] else [3.0; 4.0] func0 """ interpreter = FSharpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_raw_array(): expr = ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]) expected_code = """ func score(input []float64) []float64 { return []float64{3.0, 4.0} }""" interpreter = GoInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def softmax(exprs): exp_exprs = [ast.ExpExpr(e, to_reuse=True) for e in exprs] exp_sum_expr = utils.apply_op_to_expressions(ast.BinNumOpType.ADD, *exp_exprs, to_reuse=True) return ast.VectorVal([ ast.BinNumExpr(e, exp_sum_expr, ast.BinNumOpType.DIV) for e in exp_exprs ])
def _assemble_leaf(self, node_id): scores = self._tree.value[node_id][0] if self._is_vector_output: score_sum = scores.sum() or 1.0 outputs = [ast.NumVal(s / score_sum) for s in scores] return ast.VectorVal(outputs) else: assert len(scores) == 1, "Unexpected number of outputs" return ast.NumVal(scores[0])
def test_multi_output(): expr = ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)])) expected_code = """ def score(input) if (1.0) == (1.0) var0 = [1.0, 2.0] else var0 = [3.0, 4.0] end var0 end """ interpreter = RubyInterpreter() assert_code_equal(interpreter.interpret(expr), expected_code)
def test_raw_array(): expr = ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]) expected_code = """ #include <string.h> void score(double * input, double * output) { memcpy(output, (double[]){3.0, 4.0}, 2 * sizeof(double)); }""" interpreter = interpreters.CInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def _assemble_multi_class_output(self, trees): # Multi-class output is calculated based on discussion in # https://github.com/dmlc/xgboost/issues/1746#issuecomment-295962863 splits = _split_trees_by_classes(trees, self._output_size) base_score = self._base_score exprs = [self._assemble_single_output(t, base_score) for t in splits] proba_exprs = utils.softmax_exprs(exprs) return ast.VectorVal(proba_exprs)