def test_bin_num_expr(): expr = ast.BinNumExpr( ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), ast.NumVal(2), ast.BinNumOpType.MUL) expected_code = """ Module Model Function Score(ByRef inputVector() As Double) As Double Score = ((inputVector(0)) / (-2.0)) * (2.0) End Function End Module """ interpreter = VisualBasicInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_ignores_subroutine_expr(): expr = ast.BinNumExpr( ast.FeatureRef(0), ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.ADD), ast.BinNumOpType.MUL) expected_code = """ public class Model { public static double score(double[] input) { return (input[0]) * ((1) + (2)); } }""" interpreter = interpreters.JavaInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_num_expr(): expr = ast.BinNumExpr( ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), ast.NumVal(2), ast.BinNumOpType.MUL) interpreter = JavaInterpreter() expected_code = """ public class Model { public static double score(double[] input) { return ((input[0]) / (-2.0)) * (2.0); } }""" utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_count_all_exprs_types(): expr = ast.BinVectorNumExpr( ast.BinVectorExpr( ast.VectorVal([ ast.ExpExpr(ast.NumVal(2)), ast.SqrtExpr(ast.NumVal(2)), ast.PowExpr(ast.NumVal(2), ast.NumVal(3)), ast.TanhExpr(ast.NumVal(1)), ast.BinNumExpr(ast.NumVal(0), ast.FeatureRef(0), ast.BinNumOpType.ADD) ]), ast.VectorVal([ ast.NumVal(1), ast.NumVal(2), ast.NumVal(3), ast.NumVal(4), ast.FeatureRef(1) ]), ast.BinNumOpType.SUB), ast.IfExpr( ast.CompExpr(ast.NumVal(2), ast.NumVal(0), ast.CompOpType.GT), ast.NumVal(3), ast.NumVal(4), ), ast.BinNumOpType.MUL) assert ast.count_exprs(expr) == 27
def test_rbf_kernel(): estimator = svm.SVC(kernel="rbf", random_state=1, gamma=2.0) estimator.fit([[1], [2]], [1, 2]) assembler = assemblers.SVMModelAssembler(estimator) actual = assembler.assemble() negative_gamma_ast = ast.BinNumExpr(ast.NumVal(0), ast.NumVal(estimator.gamma), ast.BinNumOpType.SUB) def kernel_ast(sup_vec_value): return ast.SubroutineExpr( ast.ExpExpr( ast.BinNumExpr( negative_gamma_ast, ast.PowExpr( ast.BinNumExpr(ast.NumVal(sup_vec_value), ast.FeatureRef(0), ast.BinNumOpType.SUB), ast.NumVal(2)), ast.BinNumOpType.MUL))) expected = _create_expected_ast( estimator, [kernel_ast(1.0), kernel_ast(2.0)]) assert utils.cmp_exprs(actual, expected)
def test_regression(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1) utils.get_regression_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.918), ast.CompOpType.GT), ast.NumVal(24.011454621684155), ast.NumVal(22.289277544391084)), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.49461212269771115), ast.NumVal(0.7174324413014594)), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_bin_num_expr(): expr = ast.BinNumExpr( ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), ast.NumVal(2), ast.BinNumOpType.MUL) expected_code = """ <?php function score(array $input) { return (($input[0]) / (-2.0)) * (2.0); } """ interpreter = PhpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_log1p_exp_output_transform(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1, objective="cross_entropy_lambda") utils.get_bounded_regression_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.Log1pExpr( ast.ExpExpr( ast.BinNumExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(19.23), ast.CompOpType.GT), ast.NumVal(0.6623502468), ast.NumVal(0.6683497987)), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(15.145), ast.CompOpType.GT), ast.NumVal(0.1405181490), ast.NumVal(0.1453602134)), ast.BinNumOpType.ADD))) assert utils.cmp_exprs(actual, expected)
def test_exp_output_transform(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1, objective="poisson") utils.get_regression_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.ExpExpr( ast.BinNumExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.918), ast.CompOpType.GT), ast.NumVal(3.1480683932), ast.NumVal(3.1101554907)), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.0111969636), ast.NumVal(0.0160298303)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_categorical(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1, min_data_per_group=1000) X = np.column_stack(( np.random.choice([0,1,2,3,4,5], size=[10000, 1]), np.random.choice([0,1,2,3], size=[10000,1]))) y = np.random.normal(size=[10000, 1]) estimator.fit(X, y, categorical_feature=[1]) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.918), ast.CompOpType.GT), ast.NumVal(24.011454621684155), ast.NumVal(22.289277544391084)), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.49461212269771115), ast.NumVal(0.7174324413014594)), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_reused_expr(): reused_expr = ast.ExpExpr(ast.NumVal(1.0), to_reuse=True) expr = ast.BinNumExpr(reused_expr, reused_expr, ast.BinNumOpType.DIV) expected_code = """ Module Model Function Tanh(ByVal number As Double) As Double If number > 44.0 Then ' exp(2*x) <= 2^127 Tanh = 1.0 Exit Function End If If number < -44.0 Then Tanh = -1.0 Exit Function End If Tanh = (Math.Exp(2 * number) - 1) / (Math.Exp(2 * number) + 1) End Function Function score(ByRef input_vector() As Double) As Double Dim var0 As Double var0 = Math.Exp(1.0) score = (var0) / (var0) End Function End Module """ interpreter = VisualBasicInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_bin_num_expr(): expr = ast.BinNumExpr( ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), ast.NumVal(2), ast.BinNumOpType.MUL) expected_code = """ module Model where score :: [Double] -> Double score input = (((input) !! (0)) / (-2.0)) * (2.0) """ interpreter = HaskellInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_nested_condition(): left = ast.BinNumExpr( ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.NumVal(1), ast.NumVal(2)), ast.NumVal(2), ast.BinNumOpType.ADD) bool_test = ast.CompExpr(ast.NumVal(1), left, ast.CompOpType.EQ) expr_nested = ast.IfExpr(bool_test, ast.FeatureRef(2), ast.NumVal(2)) expr = ast.IfExpr(bool_test, expr_nested, ast.NumVal(2)) expected_code = """ let score (input : double list) = let func0 = if ((1.0) = (1.0)) then 1.0 else 2.0 let func1 = if ((1.0) = ((func0) + (2.0))) then if ((1.0) = ((func0) + (2.0))) then input.[2] else 2.0 else 2.0 func1 """ interpreter = FSharpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_deep_mixed_exprs_not_reaching_threshold(): expr = ast.NumVal(1) for _ in range(4): inner = ast.NumVal(1) for __ in range(2): inner = ast.BinNumExpr(ast.NumVal(1), inner, ast.BinNumOpType.ADD) expr = ast.IfExpr( ast.CompExpr(inner, ast.NumVal(1), ast.CompOpType.EQ), ast.NumVal(1), expr) interpreter = CustomFSharpInterpreter() expected_code = """ let score (input : double list) = let func0 = if (((1.0) + ((1.0) + (1.0))) = (1.0)) then 1.0 else if (((1.0) + ((1.0) + (1.0))) = (1.0)) then 1.0 else if (((1.0) + ((1.0) + (1.0))) = (1.0)) then 1.0 else if (((1.0) + ((1.0) + (1.0))) = (1.0)) then 1.0 else 1.0 func0 """ utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_nested_condition(): left = ast.BinNumExpr( ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.NumVal(1), ast.NumVal(2)), ast.NumVal(2), ast.BinNumOpType.ADD) bool_test = ast.CompExpr(ast.NumVal(1), left, ast.CompOpType.EQ) expr_nested = ast.IfExpr(bool_test, ast.FeatureRef(2), ast.NumVal(2)) expr = ast.IfExpr(bool_test, expr_nested, ast.NumVal(2)) expected_code = """ def score(input): if (1.0) == (1.0): var1 = 1.0 else: var1 = 2.0 if (1.0) == ((var1) + (2.0)): if (1.0) == (1.0): var2 = 1.0 else: var2 = 2.0 if (1.0) == ((var2) + (2.0)): var0 = input[2] else: var0 = 2.0 else: var0 = 2.0 return var0 """ interpreter = interpreters.PythonInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_deep_mixed_exprs_not_reaching_threshold(): expr = ast.NumVal(1) for _ in range(4): inner = ast.NumVal(1) for __ in range(2): inner = ast.BinNumExpr(ast.NumVal(1), inner, ast.BinNumOpType.ADD) expr = ast.IfExpr( ast.CompExpr(inner, ast.NumVal(1), ast.CompOpType.EQ), ast.NumVal(1), expr) interpreter = CustomPythonInterpreter() expected_code = """ def score(input): if ((1.0) + ((1.0) + (1.0))) == (1.0): var0 = 1.0 else: if ((1.0) + ((1.0) + (1.0))) == (1.0): var0 = 1.0 else: if ((1.0) + ((1.0) + (1.0))) == (1.0): var0 = 1.0 else: if ((1.0) + ((1.0) + (1.0))) == (1.0): var0 = 1.0 else: var0 = 1.0 return var0 """ utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_deep_mixed_exprs_exceeding_threshold(): expr = ast.NumVal(1) for i in range(4): inner = ast.NumVal(1) for i in range(4): inner = ast.BinNumExpr(ast.NumVal(1), inner, ast.BinNumOpType.ADD) expr = ast.IfExpr( ast.CompExpr(inner, ast.NumVal(1), ast.CompOpType.EQ), ast.NumVal(1), expr) interpreter = CustomPythonInterpreter() expected_code = """ def score(input): var1 = (1) + ((1) + (1)) if ((1) + ((1) + (var1))) == (1): var0 = 1 else: var2 = (1) + ((1) + (1)) if ((1) + ((1) + (var2))) == (1): var0 = 1 else: var3 = (1) + ((1) + (1)) if ((1) + ((1) + (var3))) == (1): var0 = 1 else: var4 = (1) + ((1) + (1)) if ((1) + ((1) + (var4))) == (1): var0 = 1 else: var0 = 1 return var0""" utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_count_exprs(): assert ast.count_exprs( ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.ADD) ) == 3 assert ast.count_exprs( ast.ExpExpr(ast.NumVal(2)) ) == 2 assert ast.count_exprs( ast.VectorVal([ ast.NumVal(2), ast.TanhExpr(ast.NumVal(3)) ]) ) == 4 assert ast.count_exprs( ast.IfExpr( ast.CompExpr(ast.NumVal(2), ast.NumVal(0), ast.CompOpType.GT), ast.NumVal(3), ast.NumVal(4), ) ) == 6 assert ast.count_exprs(ast.NumVal(1)) == 1
def test_log1p_exp_output_transform(): estimator = lgb.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1, objective="cross_entropy_lambda") utils.get_bounded_regression_model_trainer()(estimator) assembler = LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.Log1pExpr( ast.ExpExpr( ast.BinNumExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(19.23), ast.CompOpType.GT), ast.NumVal(0.6622623010380544), ast.NumVal(0.6684065452877841)), ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(15.145), ast.CompOpType.GT), ast.NumVal(0.1404975120475147), ast.NumVal(0.14535916856709272)), ast.BinNumOpType.ADD))) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_glm_identity_link_func(): estimator = utils.StatsmodelsSklearnLikeWrapper( sm.GLM, dict(init=dict(family=sm.families.Tweedie(sm.families.links.Power(1))), fit=dict(maxiter=1))) estimator = estimator.fit([[1], [2], [3]], [0.1, 0.2, 0.2]) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.0791304348), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_glm_log_link_func(): estimator = utils.StatsmodelsSklearnLikeWrapper( sm.GLM, dict(init=dict(family=sm.families.Poisson(sm.families.links.log())), fit=dict(maxiter=1))) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() expected = ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-1.0242053933), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_sklearn_glm_identity_link_func(): estimator = linear_model.TweedieRegressor( power=0, link="identity", max_iter=10) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.SklearnGLMModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.NumVal(0.12), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(0.02), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_binary_class(): estimator = linear_model.LogisticRegression() estimator.coef_ = np.array([[1, 2]]) estimator.intercept_ = np.array([3]) assembler = assemblers.LinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(3), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(1), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(2), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_glm_sqr_power_link_func(): estimator = utils.StatsmodelsSklearnLikeWrapper( sm.GLM, dict(init=dict(family=sm.families.Tweedie(sm.families.links.Power(2))), fit=dict(maxiter=1))) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.StatsmodelsGLMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SqrtExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.0154915480), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_two_features(): estimator = linear_model.LinearRegression() estimator.coef_ = [1, 2] estimator.intercept_ = 3 assembler = assemblers.LinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(3), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(1), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(2), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_single_feature(): estimator = linear_model.LinearRegression() estimator.coef_ = np.array([1]) estimator.intercept_ = np.array([3]) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.NumVal(3), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(1), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_sklearn_glm_log_link_func(): estimator = linear_model.TweedieRegressor(power=1, link="log", fit_intercept=False, max_iter=10) estimator = estimator.fit([[1], [2]], [0.1, 0.2]) assembler = assemblers.SklearnGLMModelAssembler(estimator) actual = assembler.assemble() expected = ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(-0.4619711397), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(-0.0731707439), ast.NumVal(0.142857149)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(0.0341463387), ast.NumVal(-0.0714285821)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(4.85000038), ast.CompOpType.GTE), ast.NumVal(0.129441619), ast.NumVal(-0.0681440532)), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def test_bin_num_expr(): expr = ast.BinNumExpr( ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-2), ast.BinNumOpType.DIV), ast.NumVal(2), ast.BinNumOpType.MUL) expected_code = """ namespace ML { public static class Model { public static double Score(double[] input) { return ((input[0]) / (-2.0)) * (2.0); } } } """ interpreter = CSharpInterpreter() assert_code_equal(interpreter.interpret(expr), expected_code)
def softmax_exprs(exprs): exp_exprs = [ast.ExpExpr(e, to_reuse=True) for e in exprs] exp_sum_expr = apply_op_to_expressions(ast.BinNumOpType.ADD, *exp_exprs, to_reuse=True) return [ ast.BinNumExpr(e, exp_sum_expr, ast.BinNumOpType.DIV) for e in exp_exprs ]